library(knitr)
library(ggplot2)
library(stringr)
suppressMessages(library(dplyr))
library(sp)
suppressMessages(library(rgdal))
suppressMessages(library(dismo))
suppressMessages(library(stargazer))
library(leaflet)
library(XML)
suppressMessages(library(maptools))
library(automap)
suppressMessages(library(RStata))
suppressMessages(library(fields))
library(gstat)
library(htmltools)
suppressMessages(library(Matching))
wd <- "/Users/mlowes/drive/optimized_agronomy/soil/soil_health_study/data/ke baseline"
dd <- paste(wd, "data", sep = "/")
od <- paste(wd, 'output', sep = "/")

load(paste("/Users/mlowes/drive/R_help/3_analyze/regressions", "regression_functions.Rdata", sep = "/"))
d <- read.csv(paste(dd, "KE SHS Baseline Survey Data.csv", sep = '/'))
soil <- read.csv(paste(dd, "Kenya_SHS_results.csv", sep = "/"))
cnP <- read.csv(paste(dd, "Kenya_SHS_N_C.csv", sep = "/"))
Identifiers <- read.csv(paste(dd, "Combined meta with SSN.csv", sep = '/'), stringsAsFactors = F)

First step is merging the data. The ids in the Identifiers data need to be adjusted so that they match the ids in the ids in CommCare

Identifiers$oaf.ID <- tolower(Identifiers$oaf.ID)
Identifiers$DISTRICT[Identifiers$DISTRICT=="Kakamega North"] <- "Kakamega B (North)"
Identifiers$DISTRICT[Identifiers$DISTRICT=="Kakamega South"] <- "Kakamega (South)"

d$id.DistrictName <- as.character(d$id.DistrictName)
d$id.DistrictName[d$id.DistrictName=="GemLundha"] <- "Gem"
d$id.DistrictName[d$id.DistrictName=="Lug Ari"] <- "Lugari"

d$id.Soil_Sample_Id <- tolower(d$id.Soil_Sample_Id)

Identifiers$sample_id <- ifelse(grepl("c", Identifiers$oaf.ID), Identifiers$oaf.ID, paste(tolower(Identifiers$DISTRICT), Identifiers$oaf.ID, sep = ""))

Identifiers$sample_id <- gsub(" ", "", Identifiers$sample_id)
d$id.Soil_Sample_Id <- gsub(" ", "", d$id.Soil_Sample_Id)
table(d$id.Soil_Sample_Id %in% Identifiers$sample_id)
## 
## FALSE  TRUE 
##    35  2033

We’re currently missing 35 matches. Investigate why we’re missing these. Below are the ids that do not appear in the Identifiers data but appear in the CommCare survey data:

d$id.Soil_Sample_Id[!d$id.Soil_Sample_Id %in% Identifiers$sample_id]
##  [1] "busia33787"            "rachuonyo40077"       
##  [3] "kakamegab(north)15878" "rongo33010"           
##  [5] "c1136"                 "rongo35982"           
##  [7] "gem45918"              "gem64358"             
##  [9] "c857"                  "rachuonyo42533"       
## [11] "rachuonyo39005"        "kakamegab(north)6413" 
## [13] "kakamegab(north)61463" "butere49984"          
## [15] "kisii43004"            "gem57424"             
## [17] "c703"                  "c290"                 
## [19] "busia34175"            "c241"                 
## [21] "webuye52641"           "c863"                 
## [23] "c706"                  "migori17557"          
## [25] "migori14465"           "rachuonyo16330"       
## [27] "matete54668"           "suneka30533"          
## [29] "c1121"                 "suneka16515"          
## [31] "lugari52284"           "kakamegab(north)63510"
## [33] "gem59393"              "rachuonyo22477"       
## [35] "c1176"
missing <- d$id.Soil_Sample_Id[!d$id.Soil_Sample_Id %in% Identifiers$sample_id]

Issues seem to be:

missingAll <- d[!d$id.Soil_Sample_Id %in% Identifiers$sample_id,]
write.csv(missingAll, paste(od, "missing_matches.csv", sep = "/"), row.names = F)

For the ids that should have an OAF id but don’t see if that oaf id exists in the Identifiers data.

missingId <- str_extract_all(missing,"\\(?[0-9,.]+\\)?")
missingId <- unlist(missingId)
table(missingId %in% Identifiers$oaf.ID)
## 
## FALSE  TRUE 
##    33     2
missingId[missingId %in% Identifiers$oaf.ID]
## [1] "61463" "63510"

These are the ids that appear in the Identifiers data but not in the CommCare surey data:

Identifiers$sample_id[!Identifiers$sample_id %in%  d$id.Soil_Sample_Id]
##  [1] "busia33789"            "kakamegab(north)15873"
##  [3] "butere44824"           "butere56493"          
##  [5] "butere59600"           "gem4598"              
##  [7] "gem37424"              "gem64558"             
##  [9] "c328"                  "c280"                 
## [11] "webuye1233"            "rachuonyo224777"      
## [13] "rachuonyo38459"        "rachuonyo163330"      
## [15] "rachuonyo425333"       "rachuonyo39009"       
## [17] "rachuonyo400777"       "suneka16575"          
## [19] "c1174"                 "rongo3310"            
## [21] "kisii3004"             "c627"                 
## [23] "migori7557"            "migori14342"          
## [25] "migori8936"            "migori14159"          
## [27] "migori7179"            "migori17315"          
## [29] "migori13503"           "c901"                 
## [31] "c894"                  "c892"                 
## [33] "c893"                  "c861"                 
## [35] "c862"                  "c895"                 
## [37] "c896"

Merge the data and drop non-merged obs

Come back to this. I don’t want to simply drop observations but we want to be working with full data for summaries and regressions.

d <- merge(d, Identifiers[,c("SSN", "sample_id")], by.x="id.Soil_Sample_Id", by.y="sample_id", all.x=TRUE)

d <- d[!is.na(d$SSN),]

Merge in the soil data

table(d$SSN %in% soil$SSN)
## 
## FALSE  TRUE 
##     1  2035
d <- merge(d, soil[,c(1,3,6:24)], by="SSN", all.x=TRUE)

Merge C and N predictions

table(d$SSN %in% cnP$SSN)
## 
## FALSE  TRUE 
##     1  2035
d <- merge(d, cnP, by="SSN", all.x=TRUE)
names(d)[names(d)=="OC_PLS1" | names(d)== "TN_PLS1"] <- c("Total.C", "Total.N")

Cleaning baseline variables

# take out weird CommCare stuff
d[d=="---"] <- NA

names(d) <- gsub("id.", "", names(d))
names(d) <- gsub("livestock.", "", names(d))
# seedtype to yield
names(d)[24:27] <- gsub("plot_information.", "", names(d)[24:27])

#inputs
names(d)[34:65] <- gsub("plot_information.", "", names(d)[34:65])

#intercrop
names(d)[28:33] <- gsub("plot_information.plot_information.","intercrop.",  names(d)[28:33])

names(d) <- gsub("historical.", "", names(d))
names(d) <- gsub("field_information.", "", names(d))

Recode variables to numeric

varlist <- c("seedkgs", "yield", "intercrop.seedkgs", "intercrop.yield",
             "inputs.dap_kg", "inputs.can_kg", "inputs.npk_kg", "inputs.urea_kg", "inputs.dap_kg_intercrop", "inputs.can_kg_intercrop",
             "inputs.npk_kg_intercrop", "inputs.urea_kg_intercrop",
             "inputs.lime_kg")

d[, varlist] <- sapply(d[,varlist], as.numeric)
d <- cbind(d, str_split_fixed(d$gps, " ", n=4))
names(d)[names(d)=="1" |names(d)== "2" | names(d)== "3" | names(d)== "4"] <- c("lat", "lon", "alt", "precision")
d[,c("lat", "lon", "alt", "precision")] <- sapply(
  d[,c("lat", "lon", "alt", "precision")],                                          function(x){as.numeric(as.character(x))})

Count the number of missing values in all soil variables

soilVars <- c("C.E.C", "Cu", "EC", "Exch.Al", "Hp", "K", "Mg", "Mn",
              "pH", "B", "Ca", "Fe", "Na", "P", "PSI", "S", "Zn", "Total.C",
              "Total.N")

naCount <- sapply(d[,soilVars], function(x){
  return(sum(is.na(x)))
}) 
naCount
##   C.E.C      Cu      EC Exch.Al      Hp       K      Mg      Mn      pH 
##       1       1       1       1       1       1       1       1       1 
##       B      Ca      Fe      Na       P     PSI       S      Zn Total.C 
##       1       1       1       1       1       1       1       1       1 
## Total.N 
##       1

Drop data with missing soil data - it’s not useful for later summaries and regressions

note: Come back and understand why we don’t have perfect matches.

d <- d[-which(is.na(d$Ca)),]
summary(d[,soilVars])
##      C.E.C              Cu                EC            Exch.Al        
##  Min.   : 2.614   Min.   : 0.8157   Min.   : 22.90   Min.   :0.002227  
##  1st Qu.: 7.047   1st Qu.: 2.1852   1st Qu.: 38.72   1st Qu.:0.049040  
##  Median : 9.567   Median : 3.0869   Median : 46.07   Median :0.085674  
##  Mean   :10.420   Mean   : 3.2745   Mean   : 47.52   Mean   :0.128068  
##  3rd Qu.:13.088   3rd Qu.: 4.2528   3rd Qu.: 54.15   3rd Qu.:0.142481  
##  Max.   :40.192   Max.   :10.5906   Max.   :101.09   Max.   :2.720619  
##        Hp                K                Mg               Mn        
##  Min.   :0.03122   Min.   : 35.39   Min.   : 31.16   Min.   : 40.01  
##  1st Qu.:0.22529   1st Qu.: 94.18   1st Qu.:108.72   1st Qu.:111.81  
##  Median :0.30606   Median :126.93   Median :157.62   Median :154.54  
##  Mean   :0.35452   Mean   :142.42   Mean   :181.78   Mean   :157.50  
##  3rd Qu.:0.42156   3rd Qu.:182.19   3rd Qu.:239.80   3rd Qu.:198.95  
##  Max.   :3.32301   Max.   :550.83   Max.   :579.63   Max.   :340.54  
##        pH              B                 Ca               Fe       
##  Min.   :4.379   Min.   :0.01646   Min.   : 138.8   Min.   : 58.5  
##  1st Qu.:5.573   1st Qu.:0.09394   1st Qu.: 628.2   1st Qu.:112.9  
##  Median :5.858   Median :0.14120   Median : 916.3   Median :132.2  
##  Mean   :5.876   Mean   :0.15279   Mean   :1064.3   Mean   :136.7  
##  3rd Qu.:6.149   3rd Qu.:0.19316   3rd Qu.:1303.7   3rd Qu.:156.7  
##  Max.   :8.784   Max.   :0.67119   Max.   :8246.8   Max.   :345.6  
##        Na              P                  PSI               S         
##  Min.   :17.59   Min.   :  0.05591   Min.   : 21.14   Min.   : 5.931  
##  1st Qu.:27.85   1st Qu.:  3.53380   1st Qu.: 87.12   1st Qu.:10.754  
##  Median :30.76   Median :  7.06593   Median :119.79   Median :12.646  
##  Mean   :31.53   Mean   : 11.98246   Mean   :125.76   Mean   :13.018  
##  3rd Qu.:34.56   3rd Qu.: 13.34762   3rd Qu.:157.05   3rd Qu.:14.852  
##  Max.   :58.39   Max.   :198.04271   Max.   :369.55   Max.   :29.903  
##        Zn            Total.C          Total.N       
##  Min.   : 1.682   Min.   :0.7858   Min.   :0.05101  
##  1st Qu.: 3.639   1st Qu.:1.7646   1st Qu.:0.12616  
##  Median : 4.511   Median :2.1194   Median :0.15722  
##  Mean   : 4.757   Mean   :2.1665   Mean   :0.15790  
##  3rd Qu.: 5.653   3rd Qu.:2.5581   3rd Qu.:0.18766  
##  Max.   :12.462   Max.   :4.5113   Max.   :0.31267

Check SiteName variable

#table(d$SiteName) many misspellings
d$SiteName <- tolower(d$SiteName)

Graphs of Kenya baseline soil variables

for(i in 1:length(soilVars)){
print(
  ggplot(data=d, aes(x=as.factor(oaf), y=d[,soilVars[i]])) + 
    geom_boxplot() +
    labs(x="One Acre Fund Farmer", y=soilVars[i], title = paste("Kenya baseline soil - ", soilVars[i], sep = ""))
  )  
}

pdf(paste(od, "ke_baseline_soil.pdf", sep = "/"), width=11, height=8.5)
print(
  ggplot(data=d, aes(x=as.factor(oaf), y=d[,soilVars[i]])) + 
    geom_boxplot() +
    labs(x="One Acre Fund Farmer", y=soilVars[i], title = paste("Kenya baseline soil - ", soilVars[i], sep = ""))
  )  
dev.off()
## quartz_off_screen 
##                 2

Soil chemical relationships

There are biologically predictable relationships between soil chemical characteristics. For instance, we expect Ca and Mg to move in the same direction and be positively correlated with pH. If we had Aluminum as an outcome, we’d expect pH to be negatively correlated with soluable aluminum. Let’s look quickly to confirm if those relationships are present:

rel1 <- ggplot(d, aes(x=Ca, y=Mg)) + geom_point() +
    stat_smooth(method = "loess") + 
    labs(x = "Calcium", y= "Magnesium", title="Calcium/Magnesium relationship")
print(rel1)

rel2 <- ggplot(d, aes(x=pH, y=Ca)) + geom_point() +
  stat_smooth(method = "loess") +
  labs(x = "pH", y="Calcium", title = "pH and Calcium relationship")
print(rel2)

rel3 <- ggplot(d, aes(x=pH, y=Exch.Al)) + geom_point() +
  stat_smooth(method = "loess") +
  labs(x = "pH", y="Exchangable Aluminum", title = "pH and Exch.Al relationship")
print(rel3)

rel4 <- ggplot(d, aes(x=Total.C, y=Total.N)) + geom_point() + 
  stat_smooth(method="loess") +
  labs(x = "Total Carbon", y="Total Nitrogen", title = "Carbon and Nitrogen relationship")


pdf(paste(od, "ke_baseline_soil_relationships.pdf", sep = "/"), width=11, height=8.5)
print(rel1) 
print(rel2)
print(rel3)
print(rel4)
dev.off()
## quartz_off_screen 
##                 2

Interpretation We see the predictable soil relationships we expected. This indicates that internally, our soil data are behaving in compliance with biological principles.

Save clean demographic and soil data to external file

write.csv(d, file=paste(dd, "shs ke baseline.csv", sep = "/"))
save(d, file=paste(dd, "shs ke baseline.Rdata", sep = "/"))

Summary statistics

Table of final baseline breakdown

count <- d %>% group_by(DistrictName) %>% 
  dplyr::summarize(
    t.count = sum(ifelse(oaf==1,1,0)),
    c.count = sum(ifelse(oaf==0,1,0)),
    total = n()
  ) %>% ungroup()

count <- as.data.frame(count)
write.csv(count, file=paste(od, "final ke sample breakdown.csv", sep="/"), row.names=F)
as.data.frame(count)
##          DistrictName t.count c.count total
## 1               Busia      43      45    88
## 2              Butere     116     119   235
## 3              Chwele      40      39    79
## 4                 Gem     134     135   269
## 5    Kakamega (South)      91      89   180
## 6  Kakamega B (North)      82      84   166
## 7               Kisii      47      50    97
## 8              Lugari      88      86   174
## 9              Matete      46      47    93
## 10             Migori      86      85   171
## 11          Rachuonyo      84      90   174
## 12              Rongo      78      78   156
## 13             Suneka      34      35    69
## 14             Webuye      42      42    84
d$valley <- ifelse(d$location=="valley", 1,0)
d$hilltop <- ifelse(d$location=="hilltop", 1,0)
d$hillside <- ifelse(d$location=="hillside", 1,0)
sort(prop.table(table(d$plot_information.maincrop, useNA = 'ifany')))
## 
##        sukuma    groundnuts        millet sweetpotatoes         beans 
##   0.001965602   0.002948403   0.002948403   0.003931204   0.004422604 
##       sorghum         sugar         other        fallow         maize 
##   0.005405405   0.006879607   0.017199017   0.023095823   0.931203931
d$crop.maize <- ifelse(d$plot_information.maincrop=="maize", 1,0)

Kgs of inputs per acre and hectare

Notes from Kalie: We should be dividing input quantity by land size. 100 kgs of inputs is the soft ceiling for OAF fields as we didn’t offer more than 100 kgs of fertilizer per acre. It’s also impossible for an OAF field to be smaller than 0.25 acres.

ggplot(d, aes(x=field_size, y=inputs.dap_kg)) + geom_point()
## Warning: Removed 479 rows containing missing values (geom_point).

This would mean however that if I want to get the actual amount they applied to their field to see that that quantity also makes sense, I would multiply by the size of the field.

d$dap.check <- d$inputs.dap_kg*d$field_size
ggplot(d, aes(x=dap.check)) + geom_density()
## Warning: Removed 479 rows containing non-finite values (stat_density).

Multiplying certainly keeps us in a more normal range. The scatter plot looks okay as well. The variance in per acre application rate increases with the size of the plot.

ggplot(d, aes(x=field_size, y=dap.check)) + geom_point()
## Warning: Removed 479 rows containing missing values (geom_point).

However, if the input quatity variable is already per plot size, as the codebook suggests, I’d want to divide by the size of the plot to get the per acre application rate. This is what Kalie suggests

d$dap.acre <- d$inputs.dap_kg/d$field_size
d$can.acre <- d$inputs.can_kg/d$field_size
d$npk.acre <- d$inputs.npk_kg/d$field_size
d$urea.acre <- d$inputs.urea_kg/d$field_size

d$inputs.compost <- ifelse(d$inputs.compost==-99, NA, d$inputs.compost)
d$compost.acre <- d$inputs.compost/d$field_size
acreInputs <- names(d)[grep("acre", names(d))]

Print out the calculation to make certain I’m doing this right:

#head(d[, c("inputs.dap_kg", "field_size", "dap.acre")])
d[which.max(d$dap.acre), c("inputs.dap_kg", "field_size", "dap.acre", "oaf")]
##     inputs.dap_kg field_size dap.acre oaf
## 831            47     0.0625      752   0
for(i in 1:length(acreInputs)){
  print(
  ggplot(data=d, aes(x=d[,acreInputs[i]])) +
    geom_histogram() +
    facet_wrap(~ oaf, scales='free') +
    labs(x=acreInputs[i], title = paste("Kenya input quantity per acre - ", acreInputs[i], sep = ""))
  )  
}
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 479 rows containing non-finite values (stat_bin).

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1099 rows containing non-finite values (stat_bin).

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1999 rows containing non-finite values (stat_bin).

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1822 rows containing non-finite values (stat_bin).

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 44 rows containing non-finite values (stat_bin).

These values are totally unreasonable! These per acre (and per hectare) values seem too big. Let’s look at the per acre rates by field size

for(i in 1:length(acreInputs)){
print(
    ggplot(d, aes(x = field_size, y=d[, acreInputs[i]])) + 
    geom_point() + 
    labs(x = "Field Size in Acres", y=acreInputs[i], title = paste("Kenya input quantity by acreage - ", acreInputs[i], sep = ""))
  )
}
## Warning: Removed 479 rows containing missing values (geom_point).

## Warning: Removed 1099 rows containing missing values (geom_point).

## Warning: Removed 1999 rows containing missing values (geom_point).

## Warning: Removed 1822 rows containing missing values (geom_point).

## Warning: Removed 44 rows containing missing values (geom_point).

Cleaning input variables

d$field_size.adj <- ifelse(d$oaf==1 & d$field_size<0.25, 0.25, d$field_size)

# drop really small field sizes >> they're inflating the application rates
d$field_size.adj <- ifelse(d$field_size<0.125, 0.125, d$field_size.adj)


# updated version of the input/acre variables - winsoring values to 100 kg.
d$dap.acre <- d$inputs.dap_kg/d$field_size.adj
d$dap.acre <- ifelse(d$dap.acre>100, 100, d$dap.acre)

d$can.acre <- d$inputs.can_kg/d$field_size.adj
d$can.acre <- ifelse(d$can.acre>100, 100, d$can.acre)

d$npk.acre <- d$inputs.npk_kg/d$field_size.adj
d$npk.acre <- ifelse(d$npk.acre>100, 100, d$npk.acre)

d$urea.acre <- d$inputs.urea_kg/d$field_size.adj
d$urea.acre <- ifelse(d$urea.acre>100, 100, d$urea.acre)

d$compost.acre <- d$inputs.compost/d$field_size.adj

Clean all per acre variables

summary(d[,acreInputs])
##     dap.acre         can.acre         npk.acre        urea.acre     
##  Min.   :  2.50   Min.   :  1.50   Min.   :  1.50   Min.   :  2.00  
##  1st Qu.: 30.67   1st Qu.: 25.33   1st Qu.: 10.30   1st Qu.: 22.00  
##  Median : 46.00   Median : 42.00   Median : 19.00   Median : 34.00  
##  Mean   : 53.76   Mean   : 46.49   Mean   : 29.44   Mean   : 42.07  
##  3rd Qu.: 92.00   3rd Qu.: 68.00   3rd Qu.: 40.00   3rd Qu.: 60.00  
##  Max.   :100.00   Max.   :100.00   Max.   :100.00   Max.   :100.00  
##  NA's   :479      NA's   :1099     NA's   :1999     NA's   :1822    
##   compost.acre    
##  Min.   :    0.0  
##  1st Qu.:    0.0  
##  Median :    0.0  
##  Mean   :  240.5  
##  3rd Qu.:  120.0  
##  Max.   :53333.3  
##  NA's   :44
for(i in 1:length(acreInputs)){
print(
    ggplot(d, aes(x = field_size.adj, y=d[, acreInputs[i]], color=as.factor(oaf))) + 
    geom_point() + 
    #facet_wrap(~oaf) +
    labs(x = "Field Size in Acres", y=acreInputs[i], title = paste("Kenya input quantity by acreage - ", acreInputs[i], sep = ""))
)
}
## Warning: Removed 479 rows containing missing values (geom_point).

## Warning: Removed 1099 rows containing missing values (geom_point).

## Warning: Removed 1999 rows containing missing values (geom_point).

## Warning: Removed 1822 rows containing missing values (geom_point).

## Warning: Removed 44 rows containing missing values (geom_point).

Kim says: While the values vary far more than we see in typical M&E data, we don’t have a good reference point for fertilizer application rates during the short rains, the period about which we asked. It’s conceivable that the fertilizer/acre quantity varies this much, even among OAF farmers. I’m going to go with that for now.

Cleaning management practices in past 5 years

pastYears <- names(d)[grep("_5",  names(d))]
summary(d[,pastYears])
##  chemical_fertilizer_5 compost_fertilizer_5 lime_fertilizer_5 
##  Min.   :-99.000       Min.   :-99.000      Min.   :-99.0000  
##  1st Qu.:  2.000       1st Qu.:  0.000      1st Qu.:  0.0000  
##  Median :  5.000       Median :  1.000      Median :  0.0000  
##  Mean   :  3.211       Mean   :  1.676      Mean   : -0.1169  
##  3rd Qu.:  5.000       3rd Qu.:  5.000      3rd Qu.:  0.0000  
##  Max.   :  5.000       Max.   :  5.000      Max.   :  5.0000  
##   fertilizer_5       legum_maincrop_5   legum_intercrop_5
##  Min.   :-99.00000   Min.   :-99.0000   Min.   :-99.00   
##  1st Qu.:  0.00000   1st Qu.:  0.0000   1st Qu.:  0.00   
##  Median :  0.00000   Median :  0.0000   Median :  3.00   
##  Mean   : -0.09042   Mean   :  0.1032   Mean   :  2.64   
##  3rd Qu.:  0.00000   3rd Qu.:  0.0000   3rd Qu.:  5.00   
##  Max.   :  5.00000   Max.   :  5.0000   Max.   :  5.00
# replace -99 with NA
d[,pastYears] <- sapply(d[,pastYears], function(x){
  ifelse(x==-99, NA, x)
})

Tenure Scatter plots

Generate tenure vs. pH and tenure vs. fertilizer seasons scatter plots:

d$seasons_oaf <- ifelse(d$seasons_oaf>8, NA, d$seasons_oaf)

tenure.ph <- ggplot(d, aes(x= jitter(seasons_oaf), y=pH)) + geom_point() +
  geom_smooth(method='loess') + 
  #geom_smooth(method='lm', color="red") + 
  #scale_x_discrete(=seq(0,8,1)) + 
  labs(x = "OAF Tenure", y = "Soil pH", title = "Kenya OAF Tenure vs. soil pH")

tenure.ph
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_point).

pdf(paste(od, "tenure vs ph.pdf", sep = "/"), width=11, height=8.5)
print(tenure.ph)
## Warning: Removed 2 rows containing non-finite values (stat_smooth).

## Warning: Removed 2 rows containing missing values (geom_point).
dev.off()
## quartz_off_screen 
##                 2
d$seasons_breaks <- ifelse(d$seasons_oaf<2, 1, ifelse(d$seasons_oaf>=2 & d$seasons_oaf<5,2,3))

tenure.breaks <- ggplot(data=subset(d, !is.na(d$seasons_breaks)), aes(x=jitter(seasons_oaf), y=pH)) + 
  stat_smooth(method = "lm", se=FALSE, color="blue") +
  geom_point() +
  labs(title = "Kenya OAF Tenure vs. soil pH",
       x = "Seasons with OAF", y = "Soil pH") + 
  facet_wrap(~ seasons_breaks, scales="free_y")

tenure.breaks

tenure.fertilizer <- ggplot(d, aes(x= jitter(seasons_oaf), y=jitter(chemical_fertilizer_5))) + geom_point() +
  geom_smooth(method='loess') + 
  labs(x = "OAF Tenure", y = "Years of Fertilizer in past 5 years", title = "Kenya OAF Tenure vs. soil pH")

tenure.fertilizer
## Warning: Removed 7 rows containing non-finite values (stat_smooth).
## Warning: Removed 7 rows containing missing values (geom_point).

pdf(paste(od, "tenure vs fertilizer.pdf", sep = "/"), width=11, height=8.5)
print(tenure.fertilizer)
## Warning: Removed 7 rows containing non-finite values (stat_smooth).

## Warning: Removed 7 rows containing missing values (geom_point).
dev.off()
## quartz_off_screen 
##                 2

Baseline balance

d$own.cows <- ifelse(d$cows>0 & !is.na(d$cows), 1,0)
d$own.goats <- ifelse(d$goats>0 & !is.na(d$goats), 1,0)
d$own.chickens <- ifelse(d$chickens>0 & !is.na(d$chickens), 1,0)
d$own.pigs <- ifelse(d$pigs>0 & !is.na(d$pigs), 1,0)
d$own.sheep <- ifelse(d$sheep>0 & !is.na(d$sheep), 1,0)

Soil type

d$clay_soil <- ifelse(d$soil_type=="clay", 1,0)
d$loam_soil <- ifelse(d$soil_type=="loam", 1,0)
d$sandy_soil <- ifelse(d$soil_type=="sandy", 1,0)

Let’s see how balanced our farmers are in terms of demographic variables. One Acre Fund farmers were selected based on (list criteria) and control farmers in the same area tha fit the same criteria were also selected. No matching process has been performed to identify the control farmers that most closely resemble the One Acre Fund farmers in the sample.

names(d)[names(d)=="gender"] <- "female"

out.list <- c("female", "age", "cows", "goats", "chickens",
              "pigs", "sheep", "field_size", "yield", "dap.acre", "can.acre", "npk.acre", "urea.acre", "chemical_fertilizer_5", "compost_fertilizer_5", "lime_fertilizer_5",
"fertilizer_5", "legum_maincrop_5", "legum_intercrop_5", soilVars, "valley", "hilltop", "hillside", "crop.maize", "alt", "own.cows", "own.goats", "own.chickens", "own.pigs", "own.sheep", "clay_soil", "loam_soil", "sandy_soil")

              
output <- do.call(rbind, lapply(out.list, function(x) {
  
  out <- t.test(d[,x] ~ d[,"oaf"], data=d)
  tab <- data.frame(out[[5]][[2]],out[[5]][[1]], out[3])
  tab[,1:2] <- round(tab[,1:2],3)
  names(tab) <- c(names(out[[5]]), "pvalue")
  return(tab)
}))

# use p.adjust with bonferroni correction
output$pvalue <- p.adjust(output$pvalue, method="fdr")
rownames(output) <- out.list
output <- output[order(output$pvalue),]
output$pvalue <- ifelse(output[, 3] < 0.001, "< 0.001", round(output[, 3], 3)) 
colnames(output) <- c("1AF Client","Control", "p-value")    
print(kable(output))
1AF Client Control p-value
yield 57.479 49.790 < 0.001
chickens 11.247 7.953 < 0.001
own.cows 0.760 0.655 < 0.001
cows 2.111 1.656 < 0.001
legum_intercrop_5 2.582 2.995 < 0.001
own.chickens 0.908 0.857 0.003
can.acre 43.905 50.311 0.006
dap.acre 51.401 56.489 0.009
B 0.147 0.158 0.014
Ca 1021.661 1106.482 0.014
age 46.187 44.361 0.015
pH 5.845 5.907 0.015
own.goats 0.233 0.186 0.031
goats 0.555 0.422 0.036
own.pigs 0.073 0.048 0.056
female 0.706 0.660 0.081
EC 46.941 48.092 0.082
pigs 0.199 0.109 0.106
chemical_fertilizer_5 3.552 3.376 0.106
own.sheep 0.173 0.144 0.174
C.E.C 10.257 10.582 0.215
Hp 0.362 0.347 0.215
sheep 0.468 0.375 0.245
K 140.274 144.533 0.273
S 13.122 12.916 0.273
crop.maize 0.923 0.939 0.273
lime_fertilizer_5 0.039 0.020 0.299
Mg 178.907 184.621 0.304
legum_maincrop_5 0.273 0.227 0.349
fertilizer_5 0.222 0.182 0.405
Exch.Al 0.132 0.124 0.537
field_size 0.540 0.526 0.633
Fe 137.280 136.182 0.707
Na 31.446 31.611 0.707
npk.acre 33.733 27.557 0.777
P 12.205 11.763 0.777
Mn 158.096 156.909 0.811
valley 0.718 0.729 0.811
hillside 0.257 0.248 0.811
clay_soil 0.073 0.079 0.811
PSI 126.270 125.265 0.838
loam_soil 0.731 0.725 0.908
compost_fertilizer_5 1.837 1.812 0.921
Total.C 2.170 2.163 0.921
hilltop 0.025 0.023 0.963
urea.acre 42.139 42.028 0.992
Cu 3.273 3.276 0.992
Zn 4.757 4.758 0.992
Total.N 0.158 0.158 0.992
alt 1268.062 1268.290 0.992
sandy_soil 0.196 0.196 0.992

Demographic variables:

Agricultural practice variables:

Soil Variables:

t10order <- c("age", "female")
table10vars <- paste(t10order, collapse="|")

ke.table10 <- output[grepl(table10vars, rownames(output)),]
ke.table10 <- ke.table10[order(match(rownames(ke.table10), t10order)),]
write.csv(ke.table10, file=paste(od, "pre match balance table10.csv", sep="/"),
          row.names = T)

t11order <- c("field_size", "alt", "hilltop", "hillside", "valley", "crop.maize")
table11vars <- paste(t11order, collapse="|")
ke.table11 <- output[grepl(table11vars, rownames(output)),]
ke.table11 <- ke.table11[order(match(rownames(ke.table11), t11order)),]
write.csv(ke.table11, file=paste(od, "pre match balance table 11.csv", sep = "/"),
          row.names=T)

t12order <- c("own.cows",  "cows", "own.pigs", "pigs", "own.sheep", "sheep", "own.goats", "goats", "own.chickens", "chickens")
table12vars <- paste(t12order, collapse="|")
ke.table12 <- output[grepl(table12vars, rownames(output)),]
ke.table12 <- ke.table12[order(match(rownames(ke.table12), t12order)),]
write.csv(ke.table12, file=paste(od, "pre match balance table 12.csv", sep = "/"),
          row.names=T)
#write table
write.csv(output, file=paste(od, "ke baseline balance.csv", sep="/"), row.names=T)

Baseline balance by district

I remove intercrop elements from out.list so that the code runs properly. I have to remove all fertilizer comparisons. CAN fails in Suneka. The others fail across districts due to insuffient observations.

toMatch <- c("npk", "urea", "g_intercrop", "can")
distList <- out.list[!out.list %in% unique(grep(paste(toMatch, collapse="|"), out.list, value=TRUE))]


dist.output <- do.call(rbind, lapply(split(d, d$DistrictName), function(x) {
  
  tab <- do.call(rbind, lapply(distList, function(y) {
    out <- t.test(x[,y] ~ x[,"oaf"], data=x)
    tab <- data.frame(out[[5]][[2]], out[[5]][[1]], out[3])
    tab[,1:2] <- round(tab[,1:2],3)
    names(tab) <- c(names(out[[5]]), "pvalue")
    return(tab)
  }))
  
  return(data.frame(district = unique(x$DistrictName), tab))
}))

rownames(dist.output) <- NULL
dist.output$variable <- rep(distList,length(unique(d$DistrictName)))    

# order variables 
dist.output <- dist.output[, c(1, 5, 2:4)]
dist.output$pvalue <- p.adjust(dist.output$pvalue, method="fdr")
dist.output <- dist.output[order(dist.output$pvalue),]

dist.output$pvalue <- ifelse(dist.output$pvalue < 0.001, "< 0.001", round(dist.output$pvalue,3))
colnames(dist.output) <- c("District", "Varible", "1AF Client","Control", "p-value")    
print(kable(dist.output))
District Varible 1AF Client Control p-value
211 Kakamega (South) EC 38.026 44.894 < 0.001
272 Kakamega B (North) S 11.420 10.092 < 0.001
496 Rachuonyo legum_intercrop_5 1.202 2.478 < 0.001
221 Kakamega (South) Na 30.142 32.164 < 0.001
225 Kakamega (South) Zn 4.423 5.126 < 0.001
214 Kakamega (South) K 104.804 131.847 0.006
209 Kakamega (South) C.E.C 8.146 9.899 0.009
259 Kakamega B (North) EC 46.863 54.240 0.009
656 Webuye S 12.120 10.849 0.01
569 Rongo own.cows 0.821 0.551 0.016
215 Kakamega (South) Mg 136.679 168.831 0.018
224 Kakamega (South) S 16.017 14.313 0.018
547 Rongo EC 61.226 54.433 0.019
57 Butere yield 60.086 48.496 0.022
217 Kakamega (South) pH 5.714 5.945 0.022
266 Kakamega B (North) B 0.152 0.198 0.022
58 Butere dap.acre 48.163 65.737 0.022
219 Kakamega (South) Ca 777.513 1002.114 0.026
201 Kakamega (South) yield 60.187 47.404 0.031
297 Kisii yield 51.022 35.080 0.031
273 Kakamega B (North) Zn 4.207 4.785 0.033
218 Kakamega (South) B 0.106 0.146 0.039
608 Suneka S 12.862 14.286 0.041
99 Chwele cows 2.300 1.051 0.06
513 Rachuonyo Zn 5.316 4.501 0.063
595 Suneka EC 54.096 47.497 0.063
91 Butere own.chickens 0.931 0.798 0.067
137 Chwele own.cows 0.800 0.487 0.079
208 Kakamega (South) legum_intercrop_5 3.511 4.247 0.087
107 Chwele chemical_fertilizer_5 4.100 2.974 0.091
265 Kakamega B (North) pH 5.829 6.055 0.092
381 Lugari own.sheep 0.364 0.174 0.092
531 Rongo cows 2.372 1.487 0.092
257 Kakamega B (North) C.E.C 9.470 11.305 0.103
488 Rachuonyo field_size 0.497 0.396 0.115
592 Suneka legum_intercrop_5 3.324 4.343 0.117
270 Kakamega B (North) P 10.114 14.049 0.135
598 Suneka K 186.138 156.538 0.135
256 Kakamega B (North) legum_intercrop_5 2.805 3.583 0.135
533 Rongo chickens 9.897 5.551 0.135
593 Suneka C.E.C 12.912 11.025 0.135
39 Busia crop.maize 0.767 0.956 0.167
269 Kakamega B (North) Na 28.847 30.348 0.167
341 Lugari chickens 13.341 8.767 0.167
504 Rachuonyo Mn 207.654 183.951 0.167
82 Butere Total.C 1.882 2.030 0.169
262 Kakamega B (North) K 124.058 144.309 0.173
87 Butere crop.maize 0.948 1.000 0.173
153 Gem yield 53.281 45.119 0.173
249 Kakamega B (North) yield 69.341 59.321 0.173
298 Kisii dap.acre 45.537 63.035 0.173
343 Lugari sheep 1.091 0.442 0.173
560 Rongo S 11.638 12.507 0.173
584 Suneka field_size 0.445 0.305 0.173
586 Suneka dap.acre 53.032 72.151 0.173
263 Kakamega B (North) Mg 150.277 178.955 0.181
654 Webuye P 6.752 10.124 0.182
355 Lugari EC 46.502 50.408 0.184
151 Gem sheep 0.761 0.333 0.2
210 Kakamega (South) Cu 3.401 3.705 0.2
243 Kakamega B (North) cows 2.354 1.702 0.2
602 Suneka B 0.178 0.149 0.2
611 Suneka Total.N 0.191 0.175 0.2
222 Kakamega (South) P 4.821 6.378 0.204
233 Kakamega (South) own.cows 0.824 0.674 0.204
362 Lugari B 0.169 0.200 0.208
643 Webuye EC 42.309 47.568 0.211
603 Suneka Ca 1197.599 988.904 0.232
267 Kakamega B (North) Ca 952.727 1186.483 0.235
443 Migori chemical_fertilizer_5 2.826 2.106 0.237
245 Kakamega B (North) chickens 10.683 7.702 0.238
446 Migori fertilizer_5 0.081 0.271 0.245
653 Webuye Na 27.217 29.037 0.249
189 Gem own.sheep 0.246 0.141 0.251
532 Rongo goats 1.026 0.551 0.251
5 Busia chickens 17.093 8.178 0.254
339 Lugari cows 2.045 1.291 0.254
188 Gem own.pigs 0.052 0.007 0.261
599 Suneka Mg 230.982 196.650 0.261
482 Rachuonyo age 48.381 44.611 0.266
98 Chwele age 46.625 39.410 0.27
9 Busia yield 58.244 46.023 0.305
489 Rachuonyo yield 52.155 45.178 0.305
570 Rongo own.goats 0.397 0.244 0.308
53 Butere chickens 10.879 7.941 0.309
90 Butere own.goats 0.207 0.109 0.309
650 Webuye B 0.158 0.192 0.323
380 Lugari own.pigs 0.045 0.000 0.329
587 Suneka chemical_fertilizer_5 4.118 4.771 0.329
399 Matete legum_maincrop_5 1.283 0.681 0.331
241 Kakamega B (North) female 0.720 0.571 0.332
158 Gem fertilizer_5 0.493 0.222 0.335
610 Suneka Total.C 2.642 2.425 0.335
360 Lugari Mn 119.181 108.006 0.361
150 Gem pigs 0.150 0.007 0.362
59 Butere chemical_fertilizer_5 3.313 2.765 0.368
197 Kakamega (South) chickens 8.890 6.820 0.368
558 Rongo P 35.577 24.612 0.375
342 Lugari pigs 0.068 0.000 0.377
149 Gem chickens 11.739 8.400 0.38
236 Kakamega (South) own.pigs 0.231 0.124 0.387
561 Rongo Zn 6.361 5.807 0.387
83 Butere Total.N 0.133 0.140 0.4
89 Butere own.cows 0.741 0.630 0.414
377 Lugari own.cows 0.693 0.558 0.414
284 Kakamega B (North) own.pigs 0.110 0.036 0.419
11 Busia chemical_fertilizer_5 3.143 2.333 0.425
293 Kisii chickens 9.106 6.020 0.449
2 Busia age 44.465 39.222 0.461
183 Gem crop.maize 0.910 0.963 0.461
231 Kakamega (South) crop.maize 1.000 0.966 0.488
121 Chwele pH 6.040 6.306 0.49
247 Kakamega B (North) sheep 0.195 0.048 0.502
35 Busia Total.N 0.174 0.160 0.503
213 Kakamega (South) Hp 0.497 0.432 0.503
246 Kakamega B (North) pigs 0.244 0.048 0.503
434 Migori age 47.512 43.529 0.503
52 Butere goats 0.431 0.235 0.507
441 Migori yield 54.430 47.659 0.517
509 Rachuonyo Na 35.768 34.687 0.517
139 Chwele own.chickens 0.875 0.974 0.521
483 Rachuonyo cows 2.155 1.689 0.522
34 Busia Total.C 2.162 2.012 0.577
155 Gem chemical_fertilizer_5 3.254 3.622 0.577
437 Migori chickens 12.163 9.153 0.577
261 Kakamega B (North) Hp 0.311 0.267 0.578
285 Kakamega B (North) own.sheep 0.098 0.036 0.578
370 Lugari Total.C 2.288 2.164 0.58
123 Chwele Ca 851.837 1296.267 0.58
216 Kakamega (South) Mn 179.496 187.604 0.58
385 Matete female 0.804 0.660 0.58
400 Matete legum_intercrop_5 3.022 3.553 0.58
77 Butere Na 28.870 29.641 0.588
484 Rachuonyo goats 0.905 0.567 0.588
448 Migori legum_intercrop_5 0.721 1.106 0.592
43 Busia own.chickens 0.930 0.822 0.597
252 Kakamega B (North) compost_fertilizer_5 1.866 1.386 0.597
291 Kisii cows 1.851 1.400 0.609
368 Lugari S 11.077 10.692 0.61
492 Rachuonyo compost_fertilizer_5 0.952 1.356 0.61
498 Rachuonyo Cu 4.410 4.093 0.61
1 Busia female 0.558 0.711 0.63
78 Butere P 11.216 13.972 0.63
350 Lugari fertilizer_5 0.276 0.094 0.63
605 Suneka Na 34.168 32.524 0.63
666 Webuye own.goats 0.143 0.048 0.63
145 Gem female 0.761 0.681 0.642
475 Migori own.chickens 0.953 0.894 0.642
628 Webuye goats 0.262 0.071 0.642
426 Matete own.goats 0.217 0.106 0.652
85 Butere hilltop 0.000 0.017 0.654
115 Chwele EC 45.541 49.370 0.654
179 Gem Total.N 0.146 0.152 0.654
194 Kakamega (South) age 48.769 51.955 0.654
258 Kakamega B (North) Cu 2.469 2.682 0.654
510 Rachuonyo P 10.026 7.528 0.654
635 Webuye chemical_fertilizer_5 4.262 3.762 0.654
649 Webuye pH 5.823 5.987 0.654
160 Gem legum_intercrop_5 3.216 3.548 0.655
163 Gem EC 39.355 38.185 0.655
655 Webuye PSI 125.782 113.467 0.668
352 Lugari legum_intercrop_5 3.885 4.224 0.674
75 Butere Ca 672.203 733.903 0.674
307 Kisii EC 51.686 53.946 0.674
379 Lugari own.chickens 0.898 0.826 0.674
281 Kakamega B (North) own.cows 0.866 0.786 0.681
323 Kisii Total.N 0.222 0.229 0.681
435 Migori cows 2.709 2.141 0.681
508 Rachuonyo Fe 135.521 128.348 0.681
185 Gem own.cows 0.672 0.593 0.69
388 Matete goats 0.543 0.255 0.696
113 Chwele C.E.C 7.433 9.089 0.7
173 Gem Na 29.773 29.227 0.7
391 Matete sheep 0.087 0.319 0.7
521 Rachuonyo own.cows 0.798 0.711 0.7
646 Webuye K 117.889 132.467 0.7
651 Webuye Ca 875.015 1030.262 0.7
369 Lugari Zn 3.924 3.713 0.707
73 Butere pH 5.530 5.604 0.711
253 Kakamega B (North) lime_fertilizer_5 0.085 0.000 0.711
503 Rachuonyo Mg 265.564 246.873 0.711
516 Rachuonyo valley 0.560 0.656 0.711
601 Suneka pH 5.772 5.659 0.711
632 Webuye field_size 0.545 0.653 0.711
198 Kakamega (South) pigs 0.769 0.337 0.732
493 Rachuonyo lime_fertilizer_5 0.107 0.033 0.732
16 Busia legum_intercrop_5 2.279 2.800 0.734
60 Butere compost_fertilizer_5 2.276 1.933 0.734
127 Chwele PSI 58.827 51.199 0.734
458 Migori B 0.188 0.174 0.734
464 Migori S 12.367 12.757 0.734
609 Suneka Zn 5.966 5.538 0.734
481 Rachuonyo female 0.679 0.589 0.748
398 Matete fertilizer_5 0.239 0.532 0.753
70 Butere K 98.236 104.079 0.761
71 Butere Mg 123.018 131.436 0.761
554 Rongo B 0.164 0.149 0.761
331 Kisii own.chickens 0.872 0.780 0.765
428 Matete own.pigs 0.109 0.043 0.765
27 Busia Ca 969.547 1141.030 0.768
195 Kakamega (South) cows 1.967 1.685 0.768
320 Kisii S 14.119 13.761 0.768
383 Lugari loam_soil 0.977 0.942 0.768
403 Matete EC 43.147 45.389 0.768
147 Gem cows 1.970 1.659 0.771
235 Kakamega (South) own.chickens 0.901 0.843 0.771
128 Chwele S 9.807 9.423 0.781
200 Kakamega (South) field_size 0.544 0.485 0.781
42 Busia own.goats 0.256 0.156 0.782
178 Gem Total.C 1.811 1.868 0.784
28 Busia Fe 130.328 122.565 0.787
33 Busia Zn 5.128 4.779 0.787
37 Busia hilltop 0.000 0.022 0.787
51 Butere cows 2.017 1.748 0.787
61 Butere lime_fertilizer_5 0.017 0.000 0.787
63 Butere legum_maincrop_5 0.181 0.118 0.787
86 Butere hillside 0.009 0.000 0.787
100 Chwele goats 0.275 0.487 0.787
108 Chwele compost_fertilizer_5 1.950 1.436 0.787
117 Chwele Hp 0.243 0.206 0.787
133 Chwele hilltop 0.025 0.000 0.787
142 Chwele clay_soil 0.025 0.000 0.787
146 Gem age 47.642 45.556 0.787
193 Kakamega (South) female 0.736 0.663 0.787
205 Kakamega (South) lime_fertilizer_5 0.022 0.090 0.787
226 Kakamega (South) Total.C 2.035 2.106 0.787
242 Kakamega B (North) age 45.805 43.917 0.787
268 Kakamega B (North) Fe 124.110 120.553 0.787
275 Kakamega B (North) Total.N 0.157 0.151 0.787
277 Kakamega B (North) hilltop 0.012 0.000 0.787
282 Kakamega B (North) own.goats 0.280 0.214 0.787
302 Kisii fertilizer_5 0.106 0.240 0.787
311 Kisii Mg 275.137 290.799 0.787
314 Kisii B 0.210 0.225 0.787
338 Lugari age 46.148 43.942 0.787
346 Lugari dap.acre 48.967 44.422 0.787
349 Lugari lime_fertilizer_5 0.023 0.000 0.787
358 Lugari K 131.120 139.581 0.787
364 Lugari Fe 123.110 118.683 0.787
382 Lugari clay_soil 0.011 0.035 0.787
393 Matete yield 62.239 56.717 0.787
397 Matete lime_fertilizer_5 0.000 0.021 0.787
419 Matete Total.N 0.145 0.137 0.787
424 Matete alt 1546.540 1596.905 0.787
429 Matete own.sheep 0.065 0.128 0.787
433 Migori female 0.581 0.506 0.787
444 Migori compost_fertilizer_5 1.686 2.024 0.787
445 Migori lime_fertilizer_5 0.023 0.000 0.787
456 Migori Mn 182.830 176.837 0.787
460 Migori Fe 169.676 174.534 0.787
485 Rachuonyo chickens 9.000 7.622 0.787
495 Rachuonyo legum_maincrop_5 0.119 0.067 0.787
497 Rachuonyo C.E.C 13.759 13.063 0.787
517 Rachuonyo hilltop 0.036 0.011 0.787
538 Rongo dap.acre 46.444 53.115 0.787
557 Rongo Na 36.941 35.865 0.787
562 Rongo Total.C 2.304 2.221 0.787
581 Suneka chickens 12.529 5.829 0.787
583 Suneka sheep 0.000 0.029 0.787
589 Suneka lime_fertilizer_5 0.059 0.000 0.787
613 Suneka hilltop 0.029 0.000 0.787
621 Suneka own.sheep 0.000 0.029 0.787
625 Webuye female 0.738 0.833 0.787
630 Webuye pigs 0.000 0.024 0.787
633 Webuye yield 68.143 62.905 0.787
638 Webuye fertilizer_5 0.119 0.000 0.787
640 Webuye legum_intercrop_5 3.167 3.619 0.787
647 Webuye Mg 145.766 162.641 0.787
663 Webuye crop.maize 1.000 0.976 0.787
668 Webuye own.pigs 0.000 0.024 0.787
576 Rongo sandy_soil 0.564 0.641 0.791
641 Webuye C.E.C 9.007 9.813 0.793
518 Rachuonyo hillside 0.405 0.333 0.793
305 Kisii C.E.C 13.942 14.492 0.804
477 Migori own.sheep 0.209 0.153 0.804
657 Webuye Zn 3.855 4.111 0.804
659 Webuye Total.N 0.161 0.154 0.804
313 Kisii pH 5.764 5.842 0.805
454 Migori K 215.241 207.129 0.81
40 Busia alt 1198.566 1225.322 0.811
65 Butere C.E.C 7.720 8.072 0.811
118 Chwele K 103.957 113.435 0.811
168 Gem Mn 166.868 171.951 0.811
296 Kisii field_size 0.343 0.309 0.811
344 Lugari field_size 0.688 0.753 0.811
442 Migori dap.acre 54.533 61.589 0.811
449 Migori C.E.C 15.104 14.547 0.811
473 Migori own.cows 0.791 0.729 0.811
512 Rachuonyo S 12.912 13.322 0.811
550 Rongo K 182.358 171.577 0.811
4 Busia goats 0.535 0.333 0.812
18 Busia Cu 4.016 3.715 0.812
19 Busia EC 47.956 45.685 0.812
41 Busia own.cows 0.651 0.556 0.812
92 Butere own.pigs 0.034 0.059 0.812
110 Chwele fertilizer_5 0.225 0.077 0.812
190 Gem clay_soil 0.179 0.222 0.812
203 Kakamega (South) chemical_fertilizer_5 4.527 4.348 0.812
274 Kakamega B (North) Total.C 2.248 2.182 0.812
309 Kisii Hp 0.478 0.413 0.812
536 Rongo field_size 0.484 0.452 0.812
543 Rongo legum_maincrop_5 0.359 0.462 0.812
597 Suneka Hp 0.428 0.478 0.812
606 Suneka P 9.322 7.719 0.812
578 Suneka age 46.235 43.114 0.815
466 Migori Total.C 2.637 2.698 0.828
545 Rongo C.E.C 12.273 11.640 0.828
644 Webuye Exch.Al 0.088 0.102 0.828
25 Busia pH 5.906 5.997 0.83
431 Matete loam_soil 0.891 0.830 0.83
432 Matete sandy_soil 0.109 0.170 0.83
627 Webuye cows 2.119 1.786 0.83
254 Kakamega B (North) fertilizer_5 0.160 0.071 0.84
406 Matete K 114.570 122.255 0.846
634 Webuye dap.acre 53.223 47.324 0.854
76 Butere Fe 141.381 144.766 0.855
132 Chwele valley 0.900 0.949 0.855
366 Lugari P 11.787 10.411 0.855
645 Webuye Hp 0.291 0.269 0.855
212 Kakamega (South) Exch.Al 0.176 0.151 0.858
631 Webuye sheep 0.095 0.190 0.859
15 Busia legum_maincrop_5 0.023 0.067 0.86
122 Chwele B 0.134 0.150 0.86
202 Kakamega (South) dap.acre 51.854 55.610 0.86
417 Matete Zn 4.093 4.256 0.86
491 Rachuonyo chemical_fertilizer_5 2.702 2.889 0.86
522 Rachuonyo own.goats 0.321 0.267 0.86
523 Rachuonyo own.chickens 0.845 0.800 0.86
539 Rongo chemical_fertilizer_5 2.949 3.179 0.86
573 Rongo own.sheep 0.179 0.231 0.86
618 Suneka own.goats 0.147 0.086 0.86
68 Butere Exch.Al 0.228 0.196 0.862
119 Chwele Mg 107.356 117.334 0.863
223 Kakamega (South) PSI 145.493 140.141 0.863
271 Kakamega B (North) PSI 125.869 121.524 0.863
401 Matete C.E.C 8.499 9.037 0.863
418 Matete Total.C 2.101 2.020 0.863
537 Rongo yield 48.329 45.039 0.863
563 Rongo Total.N 0.151 0.147 0.863
658 Webuye Total.C 2.106 2.015 0.863
471 Migori crop.maize 0.907 0.871 0.864
20 Busia Exch.Al 0.124 0.104 0.865
526 Rachuonyo clay_soil 0.024 0.044 0.865
152 Gem field_size 0.474 0.447 0.867
494 Rachuonyo fertilizer_5 0.417 0.311 0.867
500 Rachuonyo Exch.Al 0.095 0.106 0.867
49 Butere female 0.733 0.689 0.867
32 Busia S 14.130 13.689 0.873
124 Chwele Fe 135.330 130.891 0.874
327 Kisii crop.maize 0.851 0.900 0.876
574 Rongo clay_soil 0.064 0.038 0.876
21 Busia Hp 0.330 0.307 0.878
74 Butere B 0.102 0.107 0.878
101 Chwele chickens 10.025 8.385 0.878
181 Gem hilltop 0.022 0.037 0.878
182 Gem hillside 0.358 0.319 0.878
187 Gem own.chickens 0.940 0.919 0.878
191 Gem loam_soil 0.799 0.763 0.878
207 Kakamega (South) legum_maincrop_5 0.356 0.247 0.878
276 Kakamega B (North) valley 0.829 0.869 0.878
286 Kakamega B (North) clay_soil 0.073 0.048 0.878
292 Kisii goats 0.702 0.540 0.878
413 Matete Na 29.027 29.532 0.878
427 Matete own.chickens 0.935 0.894 0.878
501 Rachuonyo Hp 0.290 0.307 0.878
553 Rongo pH 5.899 5.856 0.878
568 Rongo alt 1283.056 1232.582 0.878
571 Rongo own.chickens 0.885 0.846 0.878
177 Gem Zn 4.140 4.232 0.879
120 Chwele Mn 92.440 96.506 0.879
450 Migori Cu 4.040 3.945 0.879
234 Kakamega (South) own.goats 0.143 0.180 0.88
575 Rongo loam_soil 0.372 0.321 0.88
255 Kakamega B (North) legum_maincrop_5 0.622 0.488 0.88
312 Kisii Mn 221.671 215.785 0.88
594 Suneka Cu 4.218 4.054 0.88
407 Matete Mg 133.007 142.882 0.89
176 Gem S 15.437 15.655 0.891
244 Kakamega B (North) goats 0.500 0.405 0.891
310 Kisii K 212.583 218.497 0.891
67 Butere EC 40.878 41.683 0.892
143 Chwele loam_soil 0.650 0.718 0.892
237 Kakamega (South) own.sheep 0.055 0.079 0.892
330 Kisii own.goats 0.319 0.260 0.892
502 Rachuonyo K 191.338 185.835 0.892
455 Migori Mg 291.552 283.010 0.893
559 Rongo PSI 92.332 95.860 0.893
664 Webuye alt 1540.877 1568.134 0.893
251 Kakamega B (North) chemical_fertilizer_5 4.244 4.096 0.893
69 Butere Hp 0.445 0.426 0.9
389 Matete chickens 13.674 11.894 0.9
411 Matete Ca 809.328 858.827 0.9
227 Kakamega (South) Total.N 0.156 0.160 0.9
96 Butere sandy_soil 0.190 0.160 0.904
373 Lugari hilltop 0.011 0.023 0.906
384 Lugari sandy_soil 0.011 0.023 0.906
404 Matete Exch.Al 0.119 0.109 0.908
125 Chwele Na 30.915 31.522 0.91
264 Kakamega B (North) Mn 130.041 133.470 0.913
577 Suneka female 0.529 0.600 0.913
184 Gem alt 1193.128 1166.167 0.915
365 Lugari Na 28.192 27.857 0.915
415 Matete PSI 111.212 105.877 0.915
436 Migori goats 1.070 0.906 0.915
530 Rongo age 42.590 41.397 0.915
585 Suneka yield 50.529 47.086 0.915
590 Suneka fertilizer_5 0.147 0.086 0.915
363 Lugari Ca 946.378 988.467 0.916
84 Butere valley 0.991 0.983 0.917
112 Chwele legum_intercrop_5 3.175 2.923 0.917
410 Matete B 0.144 0.151 0.917
451 Migori EC 57.572 56.792 0.917
461 Migori Na 37.542 37.136 0.917
157 Gem lime_fertilizer_5 0.045 0.022 0.923
6 Busia pigs 1.047 0.844 0.926
55 Butere sheep 0.371 0.487 0.926
81 Butere Zn 4.290 4.364 0.926
114 Chwele Cu 1.812 1.875 0.926
290 Kisii age 45.809 44.420 0.926
416 Matete S 11.442 11.164 0.926
520 Rachuonyo alt 1359.050 1311.449 0.926
540 Rongo compost_fertilizer_5 0.692 0.821 0.926
62 Butere fertilizer_5 0.095 0.134 0.927
66 Butere Cu 2.609 2.530 0.927
131 Chwele Total.N 0.103 0.099 0.927
278 Kakamega B (North) hillside 0.159 0.131 0.927
287 Kakamega B (North) loam_soil 0.841 0.869 0.927
345 Lugari yield 58.636 60.395 0.927
394 Matete dap.acre 48.412 52.225 0.927
414 Matete P 12.972 11.862 0.927
472 Migori alt 1275.836 1310.417 0.927
505 Rachuonyo pH 6.182 6.144 0.927
588 Suneka compost_fertilizer_5 0.824 1.029 0.927
135 Chwele crop.maize 0.950 0.923 0.938
141 Chwele own.sheep 0.050 0.077 0.938
308 Kisii Exch.Al 0.161 0.139 0.938
624 Suneka sandy_soil 0.088 0.057 0.938
162 Gem Cu 3.353 3.407 0.938
295 Kisii sheep 0.043 0.020 0.938
395 Matete chemical_fertilizer_5 4.239 4.106 0.938
467 Migori Total.N 0.177 0.179 0.944
192 Gem sandy_soil 0.022 0.015 0.949
402 Matete Cu 2.336 2.404 0.949
629 Webuye chickens 11.524 9.548 0.949
447 Migori legum_maincrop_5 0.140 0.188 0.95
648 Webuye Mn 131.760 128.340 0.95
372 Lugari valley 0.807 0.779 0.952
30 Busia P 8.264 7.408 0.955
474 Migori own.goats 0.326 0.294 0.955
134 Chwele hillside 0.075 0.051 0.958
144 Chwele sandy_soil 0.325 0.282 0.958
180 Gem valley 0.619 0.644 0.958
280 Kakamega B (North) alt 1012.737 1054.354 0.958
322 Kisii Total.C 2.845 2.877 0.958
324 Kisii valley 0.277 0.240 0.958
326 Kisii hillside 0.723 0.760 0.958
337 Lugari female 0.773 0.744 0.958
353 Lugari C.E.C 9.406 9.648 0.958
390 Matete pigs 0.283 0.191 0.958
392 Matete field_size 0.609 0.652 0.958
421 Matete hilltop 0.087 0.064 0.958
452 Migori Exch.Al 0.067 0.071 0.958
457 Migori pH 6.186 6.212 0.958
519 Rachuonyo crop.maize 0.929 0.944 0.958
548 Rongo Exch.Al 0.114 0.108 0.958
636 Webuye compost_fertilizer_5 1.810 2.000 0.958
31 Busia PSI 148.837 144.400 0.959
130 Chwele Total.C 1.534 1.496 0.959
328 Kisii alt 1530.459 1571.339 0.959
340 Lugari goats 0.091 0.128 0.959
511 Rachuonyo PSI 141.108 145.216 0.959
542 Rongo fertilizer_5 0.208 0.169 0.959
596 Suneka Exch.Al 0.166 0.183 0.959
623 Suneka loam_soil 0.853 0.886 0.959
669 Webuye own.sheep 0.071 0.095 0.959
348 Lugari compost_fertilizer_5 1.598 1.482 0.961
316 Kisii Fe 129.242 127.516 0.961
318 Kisii P 2.809 2.614 0.963
580 Suneka goats 0.235 0.343 0.963
165 Gem Hp 0.410 0.400 0.967
23 Busia Mg 176.760 183.696 0.967
175 Gem PSI 128.096 130.272 0.967
260 Kakamega B (North) Exch.Al 0.107 0.100 0.967
299 Kisii chemical_fertilizer_5 2.298 2.120 0.967
300 Kisii compost_fertilizer_5 1.255 1.120 0.967
412 Matete Fe 126.753 128.220 0.967
423 Matete crop.maize 0.935 0.915 0.967
544 Rongo legum_intercrop_5 1.538 1.449 0.967
552 Rongo Mn 150.975 154.477 0.967
617 Suneka own.cows 0.706 0.743 0.978
8 Busia field_size 0.703 0.733 0.981
10 Busia dap.acre 37.975 40.215 0.981
14 Busia fertilizer_5 0.302 0.356 0.981
17 Busia C.E.C 10.515 10.782 0.981
29 Busia Na 29.657 29.921 0.981
36 Busia valley 0.907 0.889 0.981
50 Butere age 44.595 44.008 0.981
54 Butere pigs 0.086 0.101 0.981
80 Butere S 14.108 13.954 0.981
88 Butere alt 1273.988 1263.968 0.981
94 Butere clay_soil 0.190 0.202 0.981
95 Butere loam_soil 0.621 0.639 0.981
103 Chwele sheep 0.125 0.103 0.981
105 Chwele yield 61.175 59.231 0.981
106 Chwele dap.acre 45.398 47.258 0.981
109 Chwele lime_fertilizer_5 0.175 0.128 0.981
116 Chwele Exch.Al 0.076 0.070 0.981
164 Gem Exch.Al 0.150 0.145 0.981
166 Gem K 104.789 103.416 0.981
169 Gem pH 5.802 5.791 0.981
171 Gem Ca 785.984 795.618 0.981
186 Gem own.goats 0.224 0.237 0.981
196 Kakamega (South) goats 0.330 0.303 0.981
199 Kakamega (South) sheep 0.154 0.191 0.981
204 Kakamega (South) compost_fertilizer_5 3.088 3.157 0.981
206 Kakamega (South) fertilizer_5 0.044 0.034 0.981
228 Kakamega (South) valley 0.648 0.663 0.981
230 Kakamega (South) hillside 0.308 0.292 0.981
232 Kakamega (South) alt 1411.321 1399.184 0.981
248 Kakamega B (North) field_size 0.671 0.690 0.981
250 Kakamega B (North) dap.acre 47.820 48.786 0.981
283 Kakamega B (North) own.chickens 0.939 0.929 0.981
289 Kisii female 0.745 0.720 0.981
306 Kisii Cu 5.101 5.146 0.981
315 Kisii Ca 1375.107 1398.179 0.981
317 Kisii Na 33.998 33.832 0.981
321 Kisii Zn 5.301 5.373 0.981
329 Kisii own.cows 0.745 0.720 0.981
351 Lugari legum_maincrop_5 0.057 0.071 0.981
361 Lugari pH 5.787 5.774 0.981
367 Lugari PSI 128.062 126.435 0.981
371 Lugari Total.N 0.160 0.159 0.981
374 Lugari hillside 0.182 0.198 0.981
396 Matete compost_fertilizer_5 1.783 1.915 0.981
405 Matete Hp 0.335 0.327 0.981
408 Matete Mn 126.124 127.453 0.981
409 Matete pH 5.741 5.765 0.981
440 Migori field_size 0.471 0.458 0.981
453 Migori Hp 0.257 0.262 0.981
459 Migori Ca 1692.366 1723.831 0.981
462 Migori P 19.144 19.842 0.981
465 Migori Zn 6.479 6.544 0.981
469 Migori hilltop 0.081 0.071 0.981
470 Migori hillside 0.442 0.459 0.981
506 Rachuonyo B 0.179 0.182 0.981
514 Rachuonyo Total.C 2.290 2.273 0.981
525 Rachuonyo own.sheep 0.440 0.422 0.981
527 Rachuonyo loam_soil 0.774 0.756 0.981
546 Rongo Cu 3.522 3.474 0.981
551 Rongo Mg 217.827 213.987 0.981
564 Rongo valley 0.833 0.821 0.981
566 Rongo hillside 0.167 0.179 0.981
607 Suneka PSI 144.626 142.453 0.981
614 Suneka hillside 0.206 0.229 0.981
616 Suneka alt 1368.148 1338.315 0.981
619 Suneka own.chickens 0.794 0.771 0.981
665 Webuye own.cows 0.762 0.738 0.981
671 Webuye loam_soil 0.357 0.333 0.981
672 Webuye sandy_soil 0.643 0.667 0.981
44 Busia own.pigs 0.442 0.422 0.985
129 Chwele Zn 3.531 3.571 0.985
347 Lugari chemical_fertilizer_5 4.425 4.388 0.985
480 Migori sandy_soil 0.151 0.141 0.985
535 Rongo sheep 0.513 0.551 0.985
567 Rongo crop.maize 0.756 0.769 0.985
507 Rachuonyo Ca 1487.987 1507.098 0.985
304 Kisii legum_intercrop_5 1.957 1.880 0.994
3 Busia cows 1.512 1.578 0.994
356 Lugari Exch.Al 0.096 0.099 0.995
79 Butere PSI 113.234 112.391 0.996
167 Gem Mg 138.339 139.362 1
136 Chwele alt 955.358 937.070 1
376 Lugari alt 1077.900 1095.638 1
499 Rachuonyo EC 52.448 52.215 1
642 Webuye Cu 2.726 2.699 1
7 Busia sheep 0.093 0.089 1
12 Busia compost_fertilizer_5 1.372 1.378 1
22 Busia K 130.795 132.082 1
24 Busia Mn 174.610 175.526 1
26 Busia B 0.159 0.160 1
38 Busia hillside 0.093 0.089 1
45 Busia own.sheep 0.070 0.067 1
46 Busia clay_soil 0.023 0.022 1
47 Busia loam_soil 0.884 0.889 1
48 Busia sandy_soil 0.093 0.089 1
56 Butere field_size 0.536 0.530 1
64 Butere legum_intercrop_5 2.629 2.622 1
72 Butere Mn 135.295 136.073 1
93 Butere own.sheep 0.138 0.134 1
97 Chwele female 0.725 0.718 1
102 Chwele pigs 0.425 0.436 1
104 Chwele field_size 0.575 0.574 1
111 Chwele legum_maincrop_5 0.050 0.051 1
126 Chwele P 25.547 25.175 1
138 Chwele own.goats 0.225 0.231 1
140 Chwele own.pigs 0.125 0.128 1
148 Gem goats 0.545 0.541 1
154 Gem dap.acre 53.324 53.624 1
156 Gem compost_fertilizer_5 2.664 2.652 1
159 Gem legum_maincrop_5 0.261 0.259 1
161 Gem C.E.C 8.234 8.204 1
170 Gem B 0.110 0.110 1
172 Gem Fe 130.034 130.058 1
174 Gem P 5.694 5.604 1
220 Kakamega (South) Fe 131.662 131.618 1
229 Kakamega (South) hilltop 0.044 0.045 1
239 Kakamega (South) loam_soil 0.648 0.652 1
240 Kakamega (South) sandy_soil 0.352 0.348 1
279 Kakamega B (North) crop.maize 0.976 0.976 1
288 Kakamega B (North) sandy_soil 0.085 0.083 1
303 Kisii legum_maincrop_5 0.319 0.300 1
319 Kisii PSI 210.692 211.678 1
333 Kisii own.sheep 0.021 0.020 1
335 Kisii loam_soil 0.851 0.860 1
336 Kisii sandy_soil 0.149 0.140 1
354 Lugari Cu 2.296 2.306 1
357 Lugari Hp 0.307 0.309 1
359 Lugari Mg 159.265 160.031 1
375 Lugari crop.maize 0.989 0.988 1
378 Lugari own.goats 0.045 0.047 1
386 Matete age 43.870 44.128 1
387 Matete cows 2.217 2.191 1
420 Matete valley 0.500 0.511 1
422 Matete hillside 0.413 0.426 1
425 Matete own.cows 0.761 0.766 1
439 Migori sheep 0.593 0.588 1
463 Migori PSI 110.488 110.625 1
468 Migori valley 0.477 0.471 1
478 Migori clay_soil 0.116 0.118 1
479 Migori loam_soil 0.733 0.741 1
487 Rachuonyo sheep 1.095 1.067 1
490 Rachuonyo dap.acre 73.343 73.779 1
515 Rachuonyo Total.N 0.173 0.173 1
528 Rachuonyo sandy_soil 0.202 0.200 1
529 Rongo female 0.667 0.667 1
549 Rongo Hp 0.348 0.347 1
555 Rongo Ca 1177.134 1182.208 1
556 Rongo Fe 169.246 168.722 1
579 Suneka cows 1.471 1.429 1
591 Suneka legum_maincrop_5 0.088 0.086 1
600 Suneka Mn 188.015 188.691 1
604 Suneka Fe 146.640 146.812 1
612 Suneka valley 0.765 0.771 1
615 Suneka crop.maize 0.941 0.943 1
622 Suneka clay_soil 0.059 0.057 1
626 Webuye age 45.024 45.167 1
639 Webuye legum_maincrop_5 0.024 0.024 1
652 Webuye Fe 116.392 116.915 1
660 Webuye valley 0.952 0.952 1
662 Webuye hillside 0.048 0.048 1
667 Webuye own.chickens 0.905 0.905 1
13 Busia lime_fertilizer_5 0.000 0.000 NA
238 Kakamega (South) clay_soil 0.000 0.000 NA
294 Kisii pigs 0.000 0.000 NA
301 Kisii lime_fertilizer_5 0.000 0.000 NA
325 Kisii hilltop 0.000 0.000 NA
332 Kisii own.pigs 0.000 0.000 NA
334 Kisii clay_soil 0.000 0.000 NA
430 Matete clay_soil 0.000 0.000 NA
438 Migori pigs 0.000 0.000 NA
476 Migori own.pigs 0.000 0.000 NA
486 Rachuonyo pigs 0.000 0.000 NA
524 Rachuonyo own.pigs 0.000 0.000 NA
534 Rongo pigs 0.000 0.000 NA
541 Rongo lime_fertilizer_5 0.000 0.000 NA
565 Rongo hilltop 0.000 0.000 NA
572 Rongo own.pigs 0.000 0.000 NA
582 Suneka pigs 0.000 0.000 NA
620 Suneka own.pigs 0.000 0.000 NA
637 Webuye lime_fertilizer_5 0.000 0.000 NA
661 Webuye hilltop 0.000 0.000 NA
670 Webuye clay_soil 0.000 0.000 NA

Demographic variables:

Agricultural practice variables:

Soil Variables:

write.csv(dist.output, file=paste(od, "ke district balance.csv", sep="/"), row.names=T)

Agronomic Behaviors

suppressMessages(library(stargazer))

d[,grep("_5", names(d))] <- sapply(d[,grep("_5", names(d))], function(x){
  ifelse(x==-99, NA, x)
})

apply(d[,grep("_5", names(d))],2, table)

$chemical_fertilizer_5

0 1 2 3 4 5 296 149 179 207 93 1106

$compost_fertilizer_5

0 1 2 3 4 5 988 176 174 116 55 523

$lime_fertilizer_5

0 1 2 3 5 2007 7 12 1 5

$fertilizer_5

0 1 2 3 4 5 1841 90 39 23 7 29

$legum_maincrop_5

0 1 2 3 4 5 1771 142 56 24 14 25

$legum_intercrop_5

0 1 2 3 4 5 507 174 244 189 150 768

Plot the variables as they are. Should we consider a log transform as we used for the Rwanda variables?

for(i in names(d)[grep("_5", names(d))]) {
  print(
    ggplot(d, aes(x=d[,i])) + geom_histogram(binwidth=1) +
      labs(x = paste("Past five seasons of ", i, sep = ""))
  )
}
## Warning: Removed 5 rows containing non-finite values (stat_bin).

## Warning: Removed 3 rows containing non-finite values (stat_bin).

## Warning: Removed 3 rows containing non-finite values (stat_bin).

## Warning: Removed 6 rows containing non-finite values (stat_bin).

## Warning: Removed 3 rows containing non-finite values (stat_bin).

## Warning: Removed 3 rows containing non-finite values (stat_bin).

d$logFert <- log(d$chemical_fertilizer_5+1)
d$logCompost <- log(d$compost_fertilizer_5+1)
d$logLime <- log(d$lime_fertilizer_5+1)
d$logLegmain <- log(d$legum_maincrop_5+1)
#d$logLegint <- log(d$legum_intercrop_5+1)

logVars <- paste(names(d[grep("log", names(d))]), collapse=" + ")
cor(d[,grep("log", names(d))], use = "complete.obs")
##               logFert logCompost     logLime  logLegmain
## logFert    1.00000000 0.12629624  0.03840900  0.08136505
## logCompost 0.12629624 1.00000000  0.03135643  0.05436087
## logLime    0.03840900 0.03135643  1.00000000 -0.02479574
## logLegmain 0.08136505 0.05436087 -0.02479574  1.00000000

Baseline balance by OAF tenure

Look at farmers by duration of tenure farming with 1AF We want to understand, at least with an initial naive baseline sense, what is the cumulative effect of Tubura practices on soil health outcomes?

We will look only at current 1AF farmers and compare first year farmers to farmers with more experience with Tubura.

oafOnly <- d[which(d$oaf==1 & d$seasons_oaf>=1),]
for(i in 1:length(soilVars)){
print(
  ggplot(oafOnly, aes(x=as.factor(seasons_oaf), y=oafOnly[,soilVars[i]])) + 
    geom_boxplot() + 
    labs(x="OAF Tenure", y=soilVars[i], title = paste("KE baseline soil by tenure - ", soilVars[i], sep = ""))
  )  
}

Tenure summaries

tenureSum <- aggregate(oafOnly[,out.list], by=list(oafOnly$seasons_oaf), function(x){
  round(mean(x, na.rm=T),2)
})
tenureSum <- as.data.frame(t(tenureSum))
colnames(tenureSum) <- c(paste(seq(1,8,1), " seas.", sep=""))
print(kable(tenureSum))
1 seas. 2 seas. 3 seas. 4 seas. 5 seas. 6 seas. 7 seas. 8 seas.
Group.1 1.00 2.00 3.00 4.00 5.00 6.00 7.00 8.00
female 0.71 0.69 0.77 0.67 0.68 0.79 0.60 0.67
age 43.13 45.55 47.37 51.85 51.76 51.34 49.20 58.67
cows 1.82 1.96 2.17 2.87 2.70 2.76 1.60 4.83
goats 0.58 0.45 0.42 0.86 0.71 0.34 0.20 0.33
chickens 11.06 10.73 10.26 12.06 15.95 11.41 8.00 21.00
pigs 0.17 0.21 0.14 0.23 0.21 0.69 0.20 0.83
sheep 0.46 0.52 0.43 0.44 0.54 0.17 0.40 0.17
field_size 0.50 0.54 0.55 0.65 0.53 0.65 0.45 0.62
yield 48.79 62.83 63.74 65.09 63.08 59.62 54.80 51.50
dap.acre 56.65 50.06 50.75 45.26 44.01 42.35 62.93 53.78
can.acre 49.77 42.67 43.78 40.03 41.40 35.16 57.00 40.75
npk.acre 21.01 68.00 20.00 NaN NaN NaN NaN NaN
urea.acre 42.07 52.00 26.67 24.00 NaN 4.00 NaN NaN
chemical_fertilizer_5 3.04 3.48 3.82 4.35 4.37 4.41 4.20 5.00
compost_fertilizer_5 1.61 1.75 1.73 2.12 2.86 2.62 1.20 1.83
lime_fertilizer_5 0.02 0.06 0.02 0.09 0.08 0.00 0.00 0.00
fertilizer_5 0.22 0.25 0.21 0.28 0.16 0.24 0.00 0.00
legum_maincrop_5 0.20 0.17 0.28 0.55 0.57 0.59 0.00 0.00
legum_intercrop_5 2.54 2.51 2.53 2.69 2.89 3.28 2.40 2.17
C.E.C 10.30 10.31 10.58 10.14 9.66 9.54 6.61 9.95
Cu 3.21 3.30 3.24 3.23 3.65 3.54 1.90 3.23
EC 47.98 46.63 47.44 46.19 44.33 41.66 43.20 43.46
Exch.Al 0.13 0.14 0.13 0.11 0.18 0.09 0.14 0.10
Hp 0.37 0.35 0.35 0.34 0.47 0.33 0.37 0.34
K 142.63 141.07 143.31 136.61 131.29 121.91 90.35 130.13
Mg 179.27 182.43 182.20 174.70 168.92 168.02 92.00 159.78
Mn 153.35 161.79 157.42 156.02 169.54 179.08 103.36 152.35
pH 5.82 5.87 5.86 5.87 5.70 5.95 5.69 5.82
B 0.15 0.15 0.16 0.15 0.14 0.12 0.10 0.14
Ca 1009.45 1043.20 1054.48 1045.94 905.38 1020.28 618.14 913.43
Fe 139.24 137.46 132.84 135.61 132.93 144.18 138.20 132.92
Na 31.79 31.54 31.10 30.99 30.57 31.21 29.75 29.81
P 14.49 12.50 10.15 10.50 6.40 6.52 24.05 9.50
PSI 122.41 125.61 130.29 126.75 148.03 123.62 85.38 129.65
S 12.80 13.04 12.88 13.39 14.68 15.45 10.93 13.20
Zn 4.79 4.80 4.67 4.68 4.62 4.91 3.61 4.35
Total.C 2.18 2.15 2.21 2.15 2.16 2.07 1.66 2.10
Total.N 0.16 0.16 0.16 0.16 0.17 0.15 0.12 0.16
valley 0.72 0.72 0.74 0.72 0.68 0.72 0.80 0.67
hilltop 0.03 0.02 0.02 0.03 0.03 0.03 0.00 0.00
hillside 0.25 0.26 0.24 0.24 0.29 0.24 0.20 0.33
crop.maize 0.87 0.93 0.97 0.95 0.98 0.97 1.00 1.00
alt 1265.77 1303.87 1158.44 1222.10 1353.65 1324.20 1499.11 1464.94
own.cows 0.68 0.75 0.81 0.88 0.86 0.90 0.80 1.00
own.goats 0.22 0.22 0.20 0.33 0.32 0.28 0.20 0.33
own.chickens 0.89 0.93 0.90 0.92 0.92 0.86 1.00 1.00
own.pigs 0.05 0.08 0.07 0.09 0.10 0.28 0.20 0.33
own.sheep 0.17 0.17 0.20 0.16 0.14 0.10 0.20 0.17
clay_soil 0.09 0.05 0.07 0.08 0.08 0.10 0.00 0.17
loam_soil 0.71 0.78 0.73 0.78 0.73 0.55 0.40 0.50
sandy_soil 0.21 0.17 0.20 0.14 0.19 0.34 0.60 0.33

OAF tenure balance table

oafOnly$tenured <- ifelse(oafOnly$seasons_oaf>=3,0,1)

toMatch <- c("npk", "urea", "g_intercrop")
tenureList <- out.list[!out.list %in% unique(grep(paste(toMatch, collapse="|"), out.list, value=TRUE))]


tenure <- do.call(rbind, lapply(tenureList, function(x) {
  
  out <- t.test(oafOnly[,x] ~ oafOnly[,"tenured"], data=oafOnly)
  tab <- data.frame(out[[5]][[2]],out[[5]][[1]], out[3])
  tab[,1:2] <- round(tab[,1:2],3)
  names(tab) <- c(names(out[[5]]), "pvalue")
  return(tab)
}))

# use p.adjust with bonferroni correction
tenure$pvalue <- p.adjust(tenure$pvalue, method="fdr")
rownames(tenure) <- tenureList
tenure <- tenure[order(tenure$pvalue),]
tenure$pvalue <- ifelse(tenure[, 3] < 0.001, "< 0.001", round(tenure[, 3], 3)) 
colnames(tenure) <- c("Tenured (3+)","New (< 3)", "p-value")    
print(kable(tenure))
Tenured (3+) New (< 3) p-value
chemical_fertilizer_5 3.217 4.141 < 0.001
age 44.094 49.980 < 0.001
yield 54.379 63.318 < 0.001
own.cows 0.711 0.848 < 0.001
cows 1.877 2.549 < 0.001
crop.maize 0.898 0.966 < 0.001
P 13.700 9.457 < 0.001
legum_maincrop_5 0.189 0.425 0.002
dap.acre 53.582 47.490 0.005
field_size 0.517 0.585 0.007
compost_fertilizer_5 1.665 2.112 0.007
S 12.892 13.540 0.01
own.pigs 0.059 0.103 0.065
can.acre 45.950 41.428 0.07
Na 31.688 30.939 0.08
PSI 123.685 131.293 0.113
EC 47.440 45.912 0.142
Fe 138.535 134.661 0.194
own.goats 0.216 0.264 0.238
K 142.012 136.476 0.425
Zn 4.793 4.661 0.425
Total.N 0.157 0.160 0.425
alt 1280.940 1235.703 0.427
legum_intercrop_5 2.531 2.695 0.45
chickens 10.931 12.046 0.552
Mn 156.708 160.161 0.624
Mg 180.529 174.821 0.625
female 0.698 0.724 0.658
Cu 3.242 3.315 0.711
goats 0.532 0.586 0.726
pigs 0.184 0.236 0.726
sheep 0.485 0.425 0.726
lime_fertilizer_5 0.032 0.049 0.726
Exch.Al 0.135 0.127 0.726
loam_soil 0.738 0.718 0.726
C.E.C 10.299 10.132 0.753
B 0.148 0.145 0.795
Hp 0.360 0.368 0.807
sandy_soil 0.191 0.204 0.807
clay_soil 0.071 0.078 0.866
fertilizer_5 0.231 0.216 0.9
Ca 1022.886 1013.531 0.9
Total.C 2.171 2.162 0.9
valley 0.718 0.724 0.9
hillside 0.258 0.250 0.9
pH 5.843 5.840 0.94
hilltop 0.025 0.026 0.94
own.chickens 0.906 0.908 0.94
own.sheep 0.174 0.172 0.94

Demographic variables:

Agricultural practice variables:

Soil Variables:

Keep only soil variables with reliable r2 values for the regressions. Confirm this list with Emmanuel and confirm when we’ll get the C and N estimates

#soilVars <- c("C.E.C", "Cu", "EC", "Exch.Al", "Hp", "K", "Mg", "Mn",
#             "pH", "B", "Ca", "Fe", "Na", "P", "PSI", "S", "Zn")

soilVars <- c("Mg", "pH", "Ca", "Exch.Al", "C.E.C", "Total.C", "Total.N")
list1 <- lapply(soilVars, function(x){
  mod <- lm(as.formula(paste("d[,x] ~",  logVars, "+ as.factor(SiteName)", sep="")), data=d)
  return(mod)
})

Table 17 - years of input use

Guide to interpreting Linear-Log Models

plm.log <- function(x, range){
  
  beta = round(summary(x)$coefficients[range,1],3)
  beta.pval = summary(x)$coefficients[range,4]
  beta.conv = ifelse(beta.pval < 0.01, "***", ifelse(
    beta.pval < 0.05, "**", ifelse(
      beta.pval < 0.1, "*", "")))
  #beta.pval = round(beta.pval, 3)
  outcome = paste(beta, beta.conv, sep = "")
  outcome = c(outcome, unique(round(summary(x)$coefficients[1,1],3)))
  res = data.frame(outcome, stringsAsFactors = F)
  
  return(res)  
}

ke.table17 <- do.call(cbind, lapply(list1, function(x){
  plm.log(x, 2:5)
}))
colnames(ke.table17) <- soilVars
rownames(ke.table17) <- c(unlist(strsplit(gsub(" \\+ ", " ", logVars), " ")), "constant")
ke.table17 <- ke.table17[,c("pH","Total.C", "Total.N", "Ca", "Mg", "Exch.Al", "C.E.C")]

write.csv(ke.table17, file=paste(od, "ketable17.csv", sep="/"), row.names=T)
# stargazer for table 17
stargazer(list1, type="html", 
          title = "2016A Kenya Soil Health Baseline - Agronomic Practice Models (log)",
          covariate.labels = c("Seasons of Fertilizer", "Seasons of Compost", 
                               "Seasons of Lime", "Seasons of Legume (main)",
                               "Seasons of Legume (inter)"),
          dep.var.caption = "",
          dep.var.labels = "",
          column.labels = c(gsub("m3.","", soilVars)),
          notes = "Includes FE for site",
          omit=c("SiteName"), out=paste(od, "ke_baseline_agprac.htm",sep="/"))

Naive tenure models

stargazer(list2, type="html", 
          title = "2016A Kenya Soil Health Baseline - Naive Tenure Models",
          covariate.labels = c("OAF Tenure"),
          dep.var.caption = "",
          dep.var.labels = "",
          column.labels = c(gsub("m3.","", soilVars)),
          notes = "Includes FE for site",
          omit=c("SiteName"), out=paste(od, "ke_baseline_tenure.htm",sep="/"))
list3 <- lapply(soilVars, function(x){
  mod <- lm(as.formula(paste("d[,x] ~",  logVars, "+ seasons_oaf + as.factor(SiteName)", sep="")), data=d)
  return(mod)
})
stargazer(list3, type="html", 
          title = "2016A Kenya Soil Health Baseline - Ag Practice and Tenure",
          covariate.labels = c("Seasons of Fertilizer", "Seasons of Compost", 
                               "Seasons of Lime", "Seasons of Legume (main)",
                               "Seasons of Legume (inter)", "OAF Tenure"),
          dep.var.caption = "",
          dep.var.labels = "",
          column.labels = c(gsub("m3.","", soilVars)),
          notes = "Includes FE for site",
          omit=c("SiteName"), out=paste(od, "ke_baseline_ag_tenure.htm", sep="/"))

Table 18 - client product use and soil outcomes

First, convert all quantites to kg/acre and kg/ha. Then check for collinearity, set up and execute models

cor(d[,acreInputs[c(1:2,5)] ], use="complete.obs")
##                  dap.acre    can.acre compost.acre
## dap.acre      1.000000000  0.73288819 -0.007709517
## can.acre      0.732888188  1.00000000 -0.024655908
## compost.acre -0.007709517 -0.02465591  1.000000000

As with the Rwanda baseline soil health study, agricultural input quantity use is highly correlated.

plm.t16 <- function(x, range){
  
  beta = round(summary(x)$coefficients[range,1],3)
  beta.pval = round(summary(x)$coefficients[range,4],3)
  beta.conv = ifelse(beta.pval < 0.01, "***", ifelse(
    beta.pval < 0.05, "**", ifelse(
      beta.pval < 0.1, "*", "")))
  #beta.pval = round(beta.pval, 3)
  outcome = paste(beta, " (", beta.pval, ")", sep = "")
  outcome = c(outcome, unique(round(summary(x)$coefficients[1,1],3)))
  res = data.frame(outcome, stringsAsFactors = F)
  
  return(res)  
}
previousSeasonfert <- paste("dap.acre", "as.factor(SiteName)", sep=" + ")

list.t18 <- lapply(soilVars, function(x){
  mod <- lm(as.formula(paste("d[,x] ~", previousSeasonfert, sep="")), data=d)
  return(mod)
})

table18 <- do.call(cbind, lapply(list.t18, function(x){
  plm.t16(x, 2)
}))
colnames(table18) <- soilVars
rownames(table18) <- c("DAP (kg/acre)", "constant")

table18 <- table18[,c("pH", "Total.C", "Total.N", "Ca", "Mg", "Exch.Al")]
write.csv(table18, file=paste(od, "table18_dap.csv", sep = "/"),
          row.names = T)
previousSeasoncompost <- paste("compost.acre", "as.factor(SiteName)", sep=" + ")
list.t18b <- lapply(soilVars, function(x){
  mod <- lm(as.formula(paste("d[,x] ~", previousSeasoncompost, sep="")), data=d)
  return(mod)
})

table18b <- do.call(cbind, lapply(list.t18b, function(x){
  plm.t16(x, 2)
}))
colnames(table18b) <- soilVars
rownames(table18b) <- c("Compost (kg/acre)", "constant")

table18b <- table18b[,c("pH", "Total.C", "Total.N", "Ca", "Mg", "Exch.Al")]
write.csv(table18b, file=paste(od, "table18_compost.csv", sep = "/"),
          row.names = T)

The compost regression results are suprising. Look at Carbon and compost scatter plot

ggplot(d, aes(x = compost.acre, y=Total.C)) + geom_point() + 
  scale_x_continuous(limits=c(0,50)) + # remove larger values from the graph.
  geom_smooth(method='lm')
## Warning: Removed 668 rows containing non-finite values (stat_smooth).
## Warning: Removed 668 rows containing missing values (geom_point).

plm.t18 <- function(x, range){
  
  beta = round(summary(x)$coefficients[range,1],4)
  beta.pval = round(summary(x)$coefficients[range,4],3)
  beta.conv = ifelse(beta.pval < 0.01, "***", ifelse(
    beta.pval < 0.05, "**", ifelse(
      beta.pval < 0.1, "*", "")))
  #beta.pval = round(beta.pval, 3)
  outcome = paste(beta, " (", beta.pval, ")", sep = "")
  outcome = c(outcome, unique(round(summary(x)$coefficients[1,1],3)))
  res = data.frame(outcome, stringsAsFactors = F)
  
  return(res)  
}

previousSeasonCan <- paste("can.acre", "as.factor(SiteName)", sep=" + ")
list.t18c <- lapply(soilVars, function(x){
  mod <- lm(as.formula(paste("d[,x] ~", previousSeasonCan, sep="")), data=d)
  return(mod)
})

table18c <- do.call(cbind, lapply(list.t18c, function(x){
  plm.t18(x, 2)
}))
colnames(table18c) <- soilVars
rownames(table18c) <- c("CAN (kg/acre)", "constant")

table18c <- table18c[,c("pH", "Total.C", "Total.N", "Ca", "Mg", "Exch.Al")]
write.csv(table18c, file=paste(od, "table18_can.csv", sep = "/"),
          row.names = T)

District and site level summaries of soil and managment practices

dist.sum <- aggregate(d[,out.list], by=list(d$DistrictName), function(x){
  return(c(
    paste(
      round(mean(x, na.rm=T),3), " (", round(median(x, na.rm=T),3), ")", 
      " (", round(sd(x, na.rm=T),2), ")", sep = ""
    )
    ))
})
write.csv(dist.sum, file=paste(od, "district covariate summary.csv", sep = "/"))
site.sum <- aggregate(d[,out.list], by=list(d$SiteName), function(x){
  return(c(
    paste(
      round(mean(x, na.rm=T),3), " (", round(median(x, na.rm=T),3), ")", 
      " (", round(sd(x, na.rm=T),2), ")", sep = ""
    )
    ))
})
write.csv(site.sum, file=paste(od, "site covariate summary.csv", sep = "/"))

Truncated site level summary (site count > 10 = 25 sites)

largerSite <- table(d$SiteName)>10
largerSite <- rownames(as.matrix(largerSite[largerSite==T]))

site.sum.tr <- aggregate(d[d$SiteName %in% largerSite,out.list], by=list(d[d$SiteName %in% largerSite, "SiteName"]), function(x){
  return(c(
    paste(
      round(mean(x, na.rm=T),3), " (", round(median(x, na.rm=T),3), ")", 
      " (", round(sd(x, na.rm=T),2), ")", sep = ""
    )
    ))
})
write.csv(site.sum.tr, file=paste(od, "site truncated covariate summary.csv", sep = "/"))

Female farmers farming poorer soils?

toRemove <- "female"
genderBalance <- out.list[!out.list %in% toRemove]

equal <- do.call(rbind, lapply(genderBalance, function(x){
    
    out <- t.test(d[,x] ~ d[,"female"], data=d)
    tab <- data.frame(out[[5]][[2]],out[[5]][[1]], out[3])
    tab[,1:2] <- round(tab[,1:2],3)
    names(tab) <- c(names(out[[5]]), "pvalue")
    #tab[,3] <- p.adjust(tab[,3], method="holm")
    #tab[,3] <- ifelse(tab[,3] < 0.001, "< 0.001", round(tab[,3],3))
    #print(tab)
    return(tab)
  
}))

rownames(equal) <- NULL

# order variables 
equal$pvalue <- p.adjust(equal$pvalue, method="fdr")
rownames(equal) <- genderBalance
equal <- equal[order(equal$pvalue),]

equal$pvalue <- ifelse(equal$pvalue < 0.001, "< 0.001", round(equal$pvalue,3))
colnames(equal) <- c("Male Farmers","Female Farmers", "p-value")    
write.csv(equal, file=paste(od, "female farmer status.csv", sep = "/"), row.names=T)

Propensity Score Matching

We need to do a more rigorous job of accounting for differences between Tubura farmers and identified control farmers. Execute propensity score matching (PSM) to identify control farmers that overlap with Tubura farmers with regard to their likelihood of being a Tubura farmer.

psmVars <- paste(out.list <- c("female", "age", "cows", "goats", "chickens","pigs", "sheep", "SiteName"), collapse=" + ")

psmCheck <- unlist(strsplit(gsub("\\+", "", as.vector(psmVars))," ", fixed=TRUE))
psmCheck <- psmCheck[-which(psmCheck=="")]
naOut <- cbind(unlist(lapply(psmCheck, function(x){sum(is.na(d[,x]))})), psmCheck)

naOut
##          psmCheck  
## [1,] "0" "female"  
## [2,] "0" "age"     
## [3,] "0" "cows"    
## [4,] "0" "goats"   
## [5,] "0" "chickens"
## [6,] "2" "pigs"    
## [7,] "0" "sheep"   
## [8,] "0" "SiteName"

We have missing values that are preventing the prediction function that preceeds PSM from running properly. Let’s for now just remove yield as a conributing variable.

h <- d[complete.cases(d[,psmCheck]),]
psmVars <- paste(psmCheck, collapse=" + ")

reg <- glm(as.formula(paste("oaf ~", psmVars, sep="")),  family= binomial(link="logit"), data=h)

# summarize predicted probabilities
pr <- data.frame(pr_score = predict(reg, type='response'), treat = h$oaf)

# graph
psmGraph <- ggplot() + geom_histogram(data=subset(pr, pr$treat==1), aes(x = pr_score, y=..count.., fill=as.factor(treat)), bins=80, position = "identity") + geom_histogram(data=subset(pr, pr$treat==0), aes(x=pr_score, y=-..count.., fill=as.factor(treat)), bins=80, position = "identity") +
    scale_y_continuous(limits=c(-150,150)) + 
  labs(title ="PSM score overlap", x = "PSM score", y="Farmer count",
       fill="1AF/Control")

print(psmGraph)

pdf(file=paste(od, "ke_baseline_psm_overlap.pdf", sep="/"), height=8.5, width=11)
print(psmGraph)
dev.off()
## quartz_off_screen 
##                 2
d$age2 <- d$age^2

coreVars = c("female", "age", "SiteName", "cows", "goats", "chickens", "pigs", "sheep",
             "sandy_soil", "loam_soil")

psmList <- list(
  list(tr = "oaf",
       psmVars = paste(coreVars,
                   collapse=" + "),
       y="Ca"),
  list(tr = "oaf",
       psmVars = paste(coreVars,
                   collapse=" + "),
       y="Mg"),
  list(tr = "oaf",
       psmVars = paste(coreVars,
                   collapse=" + "),
       y="pH"),
  list(tr = "oaf",
       psmVars = paste(coreVars,
                   collapse=" + "),
       y="Total.C"),
  list(tr = "oaf",
       psmVars = paste(coreVars,
                   collapse=" + "),
       y="Total.N"),
  list(tr = "oaf",
       psmVars = paste(coreVars,
                   collapse=" + "),
       y="chemical_fertilizer_5"),
    list(tr = "oaf",
       psmVars = paste(coreVars,
                   collapse=" + "),
       y="Exch.Al"),
  list(tr = "oaf",
       psmVars = paste(coreVars,
                   collapse=" + "),
       y="compost_fertilizer_5"),
  list(tr = "oaf",
       psmVars = paste(coreVars,
                   collapse=" + "),
       y="legum_maincrop_5"),
  list(tr = "oaf",
       psmVars = paste(coreVars,
                   collapse=" + "),
       y="legum_intercrop_5"),
  list(tr = "oaf",
       psmVars = paste(coreVars,
                   collapse=" + "),
       y="logFert"),
  list(tr = "oaf",
       psmVars = paste(coreVars,
                   collapse=" + "),
       y="dap.acre"),
  list(tr = "oaf",
       psmVars = paste(coreVars,
                   collapse=" + "),
       y="can.acre"),
  list(tr = "oaf",
       psmVars = paste(coreVars,
                   collapse=" + "),
       y="compost.acre")
)

#lime_fertilizer_5 >> so few farmers use it that we get a bad match.


# PSM
set.seed(20161102)
m <- lapply(psmList, function(listInput){

  naCheck <- unlist(strsplit(gsub("\\+", "", as.vector(listInput$psmVars))," ", fixed=TRUE))
  naCheck <- naCheck[-which(naCheck=="")]
  
  
  # keep complete cases of outcome variable
  k <- d[complete.cases(d[,naCheck]),]
  k <- k[complete.cases(k[,listInput$y]),]
  
  # run glm regression:
  reg <- glm(as.formula(paste(listInput$tr, "~", listInput$psmVars, sep="")),  family= binomial(link="logit"), data=k)
  
  suppressWarnings(
  mod <- Match(Y = k[,listInput$y], Tr = k[,listInput$tr], X = reg$fitted.values, ties=FALSE, replace=FALSE, caliper=0.25, estimand = "ATE")    
  )
  matchRes <- MatchBalance(k[,listInput$tr] ~ k[,listInput$y], match.out=mod, nboots=500, data=k, print.level = 0)
  #print(listInput$y)
  return(list(mod, matchRes))
  
})

The models can now vary by outcome. Let’s see if we can improve our results.

matchRes <- do.call(rbind, lapply(1:length(m), function(model){
  val <- as.data.frame(cbind(
    standard.diff=m[[model]][[2]]$AfterMatching[[1]]$sdiff, 
    var.ratio = m[[model]][[2]]$AfterMatching[[1]]$var.ratio,
    sdiff.adj = m[[model]][[2]]$AfterMatching[[1]]$sdiff/100))
  return(val)
}))

namesInput <- NULL
for(i in 1:length(psmList)){
  namesInput[i] <- psmList[[i]]$y
}
rownames(matchRes) <- namesInput
print(kable(matchRes))
standard.diff var.ratio sdiff.adj
Ca -13.6136287 0.6789672 -0.1361363
Mg -5.0967582 1.0804687 -0.0509676
pH -15.0434472 0.8554127 -0.1504345
Total.C 4.4842565 1.0177585 0.0448426
Total.N 0.8195454 1.0005472 0.0081955
chemical_fertilizer_5 4.5777306 0.9433016 0.0457773
Exch.Al 4.1576889 1.1818138 0.0415769
compost_fertilizer_5 -3.8350953 0.9332778 -0.0383510
legum_maincrop_5 4.5800790 1.2010159 0.0458008
legum_intercrop_5 -24.6327075 0.9618697 -0.2463271
logFert 6.0512109 0.9198516 0.0605121
dap.acre -11.7756456 0.6762797 -0.1177565
can.acre -30.4057248 0.5112521 -0.3040572
compost.acre 1.1631865 0.9475964 0.0116319
write.csv(matchRes, file=paste(od, "ke psm performance.csv", sep = "/"), row.names=T)

So few farmers use lime fertilizer that we don’t get a good match. I could make it binary but it’s so overwhelmingly the case that none of the farmers use lime that I’m just going to drop it.

We also don’t get super great matches on DAP and CAN. I’m going to push forward with this but I’m going to push forward with the results. We should flag this.

coefTable <- do.call(rbind, lapply(1:length(m), function(model){
  beta = round(m[[model]][[1]]$est.noadj,3)
  mean.Tr = round(m[[model]][[2]]$AfterMatching[[1]][[3]], 2)
  mean.Co = round(m[[model]][[2]]$AfterMatching[[1]][[4]], 2)
  pval = m[[model]][[2]]$AfterMatching[[1]][[10]][[3]] # p.value
  #pval = (1 - pnorm(abs(m[[model]][[1]]$est/m[[model]][[1]]$se.standard))) * 2
  pval = ifelse(pval < 0.001, "0.001", round(pval, 3))
  
  res = data.frame(beta, mean.Tr, mean.Co, pval, stringsAsFactors = F)
  return(res)
}))
row.names(coefTable) <- namesInput
coefTable$pval.adj <- round(p.adjust(coefTable$pval, method="fdr"),3)
print(kable(coefTable))
beta mean.Tr mean.Co pval pval.adj
Ca -76.899 1005.76 1082.66 0.001 0.004
Mg -4.711 174.93 179.64 0.143 0.286
pH -0.069 5.83 5.90 0.001 0.004
Total.C 0.025 2.16 2.13 0.201 0.291
Total.N 0.000 0.16 0.16 0.82 0.820
chemical_fertilizer_5 0.086 3.59 3.50 0.208 0.291
Exch.Al 0.007 0.13 0.12 0.234 0.298
compost_fertilizer_5 -0.081 1.87 1.95 0.28 0.327
legum_maincrop_5 0.038 0.27 0.23 0.168 0.291
legum_intercrop_5 -0.495 2.68 3.17 0.001 0.004
logFert 0.037 1.38 1.34 0.101 0.236
dap.acre -3.281 51.61 54.90 0.014 0.039
can.acre -6.971 43.33 50.30 0.001 0.004
compost.acre 22.763 279.59 256.82 0.753 0.811
write.csv(coefTable, file=paste(od, "ke psm coefficients.csv", sep = "/"),
          row.names = T)

# sort by the order Eric wants
t7order <- c("pH", "Total.C", "Total.N", "Ca", "Mg", "Exch.Al")
table7vars <- paste(t7order, collapse = "|")
table7 <- coefTable[grep(table7vars, rownames(coefTable)), ]
table7 <- table7[order(match(rownames(table7), t7order)),]

write.csv(table7, file=paste(od, "table7.csv", sep = "/"), row.names = T)

# 11/17 added lime
t8order <- c("chemical_fertilizer_5", "compost_fertilizer_5", "legum_maincrop_5",
             "legum_intercrop_5")
table8vars <- paste(t8order, collapse = "|")
table8 <- coefTable[grep(table8vars, rownames(coefTable)), ]
table8 <- table8[order(match(rownames(table8), t8order)),]

write.csv(table8, file=paste(od, "table8.csv", sep = "/"), row.names = T)

#table 3
t9order <- c("dap.acre", "can.acre", "compost.acre")
table9vars <- paste(t9order, collapse = "|")
table9 <- coefTable[grep(table9vars, rownames(coefTable)), ]
table9 <- table9[order(match(rownames(table9), t9order)),]
write.csv(table9, file=paste(od, "table9.csv", sep = "/"))

Break out first season clients from more tenured - for appendix?

This requires us to re-run the PSM matching process, confirm that our models are sound and then output the results ala table 2 again.

I’m comparing season 1 clients to clients with more tenure in the PSM matching.

d$tenure.tiers <- ifelse(d$oaf==1 & d$seasons_oaf==1, 0,
                         ifelse(d$oaf==1 & d$seasons_oaf>1, 1, NA))


coreVars = c("female", "age", "SiteName", "cows", "goats", "chickens", "pigs", "sheep")

psmList <- list(
  list(tr = "tenure.tiers",
       psmVars = paste(coreVars,
                   collapse=" + "),
       y="Ca"),
  list(tr = "tenure.tiers",
       psmVars = paste(coreVars,
                   collapse=" + "),
       y="Mg"),
  list(tr = "tenure.tiers",
       psmVars = paste(coreVars,
                   collapse=" + "),
       y="pH"),
  list(tr = "tenure.tiers",
       psmVars = paste(coreVars,
                   collapse=" + "),
       y="Total.C"),
  list(tr = "tenure.tiers",
       psmVars = paste(coreVars,
                   collapse=" + "),
       y="Total.N"),
  list(tr = "tenure.tiers",
       psmVars = paste(coreVars,
                   collapse=" + "),
       y="chemical_fertilizer_5"),
    list(tr = "tenure.tiers",
       psmVars = paste(coreVars,
                   collapse=" + "),
       y="Exch.Al"),
  list(tr = "tenure.tiers",
       psmVars = paste(coreVars,
                   collapse=" + "),
       y="compost_fertilizer_5"),
  list(tr = "tenure.tiers",
       psmVars = paste(coreVars,
                   collapse=" + "),
       y="legum_maincrop_5"),
  list(tr = "tenure.tiers",
       psmVars = paste(coreVars,
                   collapse=" + "),
       y="legum_intercrop_5"),
  list(tr = "tenure.tiers",
       psmVars = paste(coreVars,
                   collapse=" + "),
       y="logFert"),
  list(tr = "tenure.tiers",
       psmVars = paste(coreVars,
                   collapse=" + "),
       y="dap.acre"),
  list(tr = "tenure.tiers",
       psmVars = paste(coreVars,
                   collapse=" + "),
       y="can.acre"),
  list(tr = "tenure.tiers",
       psmVars = paste(coreVars,
                   collapse=" + "),
       y="compost.acre")
)

#lime_fertilizer_5 >> so few farmers use it that we get a bad match.


# PSM
set.seed(20161102)
mappend <- lapply(psmList, function(listInput){

  naCheck <- unlist(strsplit(gsub("\\+", "", as.vector(listInput$psmVars))," ", fixed=TRUE))
  naCheck <- naCheck[-which(naCheck=="")]
  
  
  # keep complete cases of outcome variable
  k <- d[complete.cases(d[,naCheck]),]
  k <- k[complete.cases(k[,listInput$y]),]
  k <- k[complete.cases(k[,listInput$tr]),]
  
  # run glm regression:
  reg <- glm(as.formula(paste(listInput$tr, "~", listInput$psmVars, sep="")),  family= binomial(link="logit"), data=k)
  
  suppressWarnings(
  mod <- Match(Y = k[,listInput$y], Tr = k[,listInput$tr], X = reg$fitted.values, ties=FALSE, replace=FALSE, caliper=0.25, estimand = "ATE")    
  )
  matchRes <- MatchBalance(k[,listInput$tr] ~ k[,listInput$y], match.out=mod, nboots=500, data=k, print.level = 0)
  #print(listInput$y)
  return(list(mod, matchRes))
  
})

The models can now vary by outcome. Let’s see if we can improve our results. (appendix)

matchRes <- do.call(rbind, lapply(1:length(mappend), function(model){
  val <- as.data.frame(cbind(
    standard.diff=mappend[[model]][[2]]$AfterMatching[[1]]$sdiff, 
    var.ratio = mappend[[model]][[2]]$AfterMatching[[1]]$var.ratio,
    sdiff.adj = mappend[[model]][[2]]$AfterMatching[[1]]$sdiff/100))
  return(val)
}))

namesInput <- NULL
for(i in 1:length(psmList)){
  namesInput[i] <- psmList[[i]]$y
}
rownames(matchRes) <- namesInput
print(kable(matchRes))
standard.diff var.ratio sdiff.adj
Ca 0.9238112 1.2218850 0.0092381
Mg -15.5729015 0.9676569 -0.1557290
pH 4.9974209 0.8986447 0.0499742
Total.C -22.4745136 1.1666893 -0.2247451
Total.N -31.0867873 1.0640974 -0.3108679
chemical_fertilizer_5 31.9321294 0.7947044 0.3193213
Exch.Al 2.2636258 3.1970113 0.0226363
compost_fertilizer_5 17.1447816 1.0188232 0.1714478
legum_maincrop_5 11.9680100 1.2999763 0.1196801
legum_intercrop_5 -14.7087869 0.9302600 -0.1470879
logFert 28.9243301 0.7743529 0.2892433
dap.acre -39.2628790 0.5683184 -0.3926288
can.acre -70.1928722 0.2701376 -0.7019287
compost.acre -5.4679030 0.1045200 -0.0546790
write.csv(matchRes, file=paste(od, "ke psm performance-appendix.csv", sep = "/"), row.names=T)

So few farmers use lime fertilizer that we don’t get a good match. I could make it binary but it’s so overwhelmingly the case that none of the farmers use lime that I’m just going to drop it.

We also don’t get super great matches on DAP and CAN. I’m going to push forward with this but I’m going to push forward with the results. We should flag this.

coefTable.append <- do.call(rbind, lapply(1:length(mappend), function(model){
  beta = round(mappend[[model]][[1]]$est.noadj,3)
  mean.Tr = round(mappend[[model]][[2]]$AfterMatching[[1]][[3]], 2)
  mean.Co = round(mappend[[model]][[2]]$AfterMatching[[1]][[4]], 2)
  pval = mappend[[model]][[2]]$AfterMatching[[1]][[10]][[3]] # p.value
  #pval = (1 - pnorm(abs(m[[model]][[1]]$est/m[[model]][[1]]$se.standard))) * 2
  pval = ifelse(pval < 0.001, "0.001", round(pval, 3))
  
  res = data.frame(beta, mean.Tr, mean.Co, pval, stringsAsFactors = F)
  return(res)
}))
row.names(coefTable.append) <- namesInput
coefTable.append$pval.adj <- round(p.adjust(coefTable.append$pval, method="fdr"),3)
print(kable(coefTable.append))
beta mean.Tr mean.Co pval pval.adj
Ca 4.608 906.47 901.87 0.9 0.900
Mg -12.895 156.10 169.00 0.051 0.089
pH 0.020 5.80 5.78 0.542 0.690
Total.C -0.122 2.03 2.15 0.003 0.007
Total.N -0.013 0.14 0.16 0.001 0.003
chemical_fertilizer_5 0.568 3.80 3.23 0.001 0.003
Exch.Al 0.004 0.14 0.13 0.725 0.823
compost_fertilizer_5 0.357 2.08 1.73 0.027 0.054
legum_maincrop_5 0.095 0.28 0.19 0.11 0.154
legum_intercrop_5 -0.295 2.65 2.95 0.058 0.090
logFert 0.171 1.44 1.27 0.001 0.003
dap.acre -9.527 46.24 55.77 0.001 0.003
can.acre -12.420 39.16 51.58 0.001 0.003
compost.acre -69.499 351.15 420.64 0.764 0.823
write.csv(coefTable.append, file=paste(od, "ke psm coefficients - appendix.csv", sep = "/"),row.names = T)
# sort by the order Eric wants
t7order.a <- c("pH", "Total.C", "Total.N", "Ca", "Mg", "Exch.Al")
table7vars.a <- paste(t7order.a, collapse = "|")
table7.a <- coefTable.append[grep(table7vars.a, rownames(coefTable.append)), ]
table7.a <- table7.a[order(match(rownames(table7.a), t7order.a)),]

write.csv(table7.a, file=paste(od, "table7-appendix.csv", sep = "/"), row.names = T)

# 11/17 added lime
t8order.a <- c("chemical_fertilizer_5", "compost_fertilizer_5", "legum_maincrop_5",
             "legum_intercrop_5")
table8vars.a <- paste(t8order.a, collapse = "|")
table8.a <- coefTable.append[grep(table8vars.a, rownames(coefTable.append)), ]
table8.a <- table8.a[order(match(rownames(table8.a), t8order.a)),]

write.csv(table8.a, file=paste(od, "table8-appendix.csv", sep = "/"), row.names = T)

#table 3
t9order.a <- c("dap.acre", "can.acre", "compost.acre")
table9vars.a <- paste(t9order.a, collapse = "|")
table9.a <- coefTable.append[grep(table9vars.a, rownames(coefTable.append)), ]
table9.a <- table9.a[order(match(rownames(table9.a), t9order.a)),]
write.csv(table9.a, file=paste(od, "table9-appendix.csv", sep = "/"))
thresh <- d %>% group_by(oaf) %>% dplyr::summarize(
  count = n(),
  ph = sum(pH<5.8),
  carbon = sum(Total.C < 2),
  nitrogen = sum(Total.N < 0.1),
  calcium = sum(Ca < 720),
  magnesium = sum(Mg < 100)
  #aluminum = sum(ExAl)
) %>% mutate(
  under.ph = paste(paste(round(ph/count,4)*100, "%", sep=""), " (", ph, ")", sep=""),
  under.carbon = paste(paste(round(carbon/count,4)*100,"%", sep=""), " (", carbon, ")", sep=""),
  under.nitrogen = paste(paste(round(nitrogen/count,4)*100, "%", sep=""), " (", nitrogen, ")", sep=""),
  under.calcium = paste(paste(round(calcium/count,4)*100, "%", sep=""), " (", calcium, ")", sep=""),
  under.mag = paste(paste(round(magnesium/count,4)*100,"%", sep=""), " (", magnesium, ")", sep="")
) %>% as.data.frame() 

thresh <- thresh[, c("oaf", names(thresh)[grep("under", names(thresh))])]
thresh <- t(thresh)
colnames(thresh) = thresh[1, ] # the first row will be the header
colnames(thresh) = c("non-client", "client")
thresh = thresh[-1, ]

write.csv(thresh, file=paste(od, "table1_rw_thresholds.csv", sep = "/"), row.names = T)

OAF tenure model - table 14 with PSM

tenureTab.add <- lapply(1:length(m), function(model){
  
  dm <- as.data.frame(rbind(d[m[[model]][[1]]$index.treated,], 
              d[m[[model]][[1]]$index.control,]))
  dm$client_tenure <- dm$oaf*dm$seasons_oaf
  mod <- lm(as.formula(paste("dm[,psmList[[model]]$y] ~", "seasons_oaf + as.factor(SiteName)", sep ="")), data=dm)
  return(mod)
})

modNames <- unlist(lapply(psmList, function(x){
  return(x$y)
}))
plm.tenure <- function(x){
  
  intercept = round(x$coefficients[[1]],3)
  beta = round(x$coefficients[[2]],3)
  int.pval = summary(x)$coefficients[1,4]
  int.pval = ifelse(int.pval < 0.001, "< 0.001", round(as.numeric(int.pval),3))
  beta.pval = summary(x)$coefficients[2,4]
  beta.pval = ifelse(beta.pval < 0.001, "< 0.001", round(as.numeric(beta.pval),3))
  res = data.frame(intercept, int.pval, beta, beta.pval, stringsAsFactors = F)
  
  return(res)  
}

tenure.reg <- do.call(rbind, lapply(tenureTab.add, function(x){
  plm.tenure(x)
}))

rownames(tenure.reg) <- modNames
t14order <- c("pH", "Total.C", "Total.N", "Ca", "Mg", "Exch.Al", "chemical_fertilizer_5",
             "logFert", "compost_fertilizer_5", "legum_maincrop_5", "n_season_fallow")
t14vars <- paste(t14order, collapse = "|")
tenure.reg <- tenure.reg[grep(t14vars,rownames(tenure.reg)),]
tenure.reg <- tenure.reg[order(match(row.names(tenure.reg), t14order)),]

write.csv(tenure.reg, file=paste(od, "table14.csv", sep = "/"), row.names=T)

Mapping

Map of baseline observations

Produce a simple map of where our observations are

if (!(exists("kenya"))){
  # Only need to geocode once per session library(dismo)
  kenya <- try(geocode("Kenya"))
  # If the internet fails, use a local value 
  if (class(kenya) == "try-error") {
    kenya <- ""
  } 
}

See here for more on using markerClusterOptions in leaflet.

In the map below, the larger green circles are One Acre Fund farmers and the smaller blue circles are control farmers.

e <- d[!is.na(d$lon),]
ss <- SpatialPointsDataFrame(coords = e[, c("lon", "lat")], data=e)

pal <- colorNumeric(c("navy", "green"), domain=unique(ss$oaf))
map <- leaflet() %>% addTiles() %>%
  setView(lng=kenya$longitude, lat=kenya$latitude, zoom=6) %>%
  addCircleMarkers(lng=ss$lon, lat=ss$lat, 
                   radius= ifelse(ss$oaf==1, 10,6),
                   color = pal(ss$oaf),
clusterOptions = markerClusterOptions(disableClusteringAtZoom=12, spiderfyOnMaxZoom=FALSE))

map

Notes: We have a lot of GPS points piled on the Bungoma office. We need to address the GPS collection practices that lead to incorrectly logged GPS.

Static map of sampling density

crdref <- CRS('+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0')
e <- d[!is.na(d$lon),]
ss <- SpatialPointsDataFrame(coords = e[, c("lon", "lat")], data=e, proj4string = crdref)

ke <- raster::getData("GADM", country='KE', level=2, path = "/Users/mlowes/drive/optimized_agronomy/soil/soil_health_study/data") # the higher the number, the higher the resolution


#ext.rw.ss matches points with spatial polygons in ke
ext.ke.ss <- extract(ke[, "NAME_2"], ss)
## Loading required namespace: rgeos
ss$spatialname <- ext.ke.ss$NAME_2

fke <- fortify(ke, region="NAME_2")
ss.count <- ss@data %>% group_by(spatialname) %>% dplyr::summarize(
  count = n()
) %>% as.data.frame()

table(ss.count$spatialname %in% fke$id)
## 
## TRUE 
##   60
plotReady <- dplyr::left_join(fke, ss.count, by=c("id"="spatialname"))

Tip: Zoom in on subset of full map and filling polygons with base R plot

library(RColorBrewer)
mapRes <-  ggplot(plotReady, aes(x=long, y=lat, group=group)) + geom_path() + 
  geom_polygon(aes(fill=plotReady$count)) + 
  coord_map(xlim = c(33.5, 36),ylim = c(-2, 1.75)) +
  scale_fill_gradientn(colours = brewer.pal(9,"Reds"), # define colors
        name = "Sampling Density",
        guide = guide_colorbar(legend.direction = "vertical")) + 
  theme_bw() + 
  labs(title="Kenya soil health study sampling density", x = "Longitude", y="Latitude")
print(mapRes)

Without ggplot

plotCount <- merge(ke, ss.count, by.x="NAME_2", by.y="spatialname")
#colfunc <- colorRampPalette("red")
#plot(plotCount, col=plotCount@data$count, xlim = c(35, 36),ylim = c(-2, 1.75))
pdf(paste(od, "kenya sampling density.pdf", sep = "/"), width=11, height=8)
print(mapRes)
dev.off()
## quartz_off_screen 
##                 2

–end